Ejemplo n.º 1
0
def as_todo_item(line):
    """Initialise a TodoItem from the line"""
    item_regexp = re.compile('^(?P<text>.*), (?P<priority>[%s])$' %
                             priority_keys_string())
    match = item_regexp.match(line)
    TodoItem = namedtuple('TodoItem', 'text, priority')
    if not match:
        return TodoItem(line, -1)
    text = match.groupdict()['text']
    priority = match.groupdict()['priority']
    return TodoItem(text, int(priority))
Ejemplo n.º 2
0
def weightTargets(weights, entities, strip=False):
    """ given a list of entities and weights, return the ones with the highest weights""" 

    genomeWeights = []
    EntityWeight = namedtuple.namedtuple("entityWeight", "entityId, weight")
    for e in entities:
        score = weights.get(e,0) # if not in file, use score 0
        genomeWeights.append( EntityWeight(e,score) )
    bestElements = maxbio.bestScoreElements (genomeWeights, "weight")

    if strip:
        bestElements = [el.entityId for el in bestElements] # strip off weight information
    return bestElements, genomeWeights
Ejemplo n.º 3
0
def weightTargets(weights, entities, strip=False):
    """ given a list of entities and weights, return the ones with the highest weights"""

    genomeWeights = []
    EntityWeight = namedtuple.namedtuple("entityWeight", "entityId, weight")
    for e in entities:
        score = weights.get(e, 0)  # if not in file, use score 0
        genomeWeights.append(EntityWeight(e, score))
    bestElements = maxbio.bestScoreElements(genomeWeights, "weight")

    if strip:
        bestElements = [el.entityId
                        for el in bestElements]  # strip off weight information
    return bestElements, genomeWeights
Ejemplo n.º 4
0
def priorities():
    """The recognised priorities in this system"""
    Priority = namedtuple('Priority', 'number, name, colour')
    # pylint: disable=bad-whitespace
    # Yeah, f**k off!
    return [
        Priority(0, 'bug', 'red'),
        Priority(1, 'yesterday', 'magenta'),
        Priority(2, 'today', 'blue'),
        Priority(3, 'tomorrow', 'cyan'),
        Priority(4, 'feature', 'green'),
        Priority(5, 'wish', 'yellow'),
        Priority(6, 'text', 'gray'),
    ]
Ejemplo n.º 5
0
def parse_todo_line(line):
    """Extract a list of todo items from a list of lines

    Each item is a tuple of (text, priority)
    """
    item_regexp = re.compile('^(?P<text>.*), (?P<priority>[%s])$' %
                             priority_keys_string())
    match = item_regexp.match(line)
    TodoItem = namedtuple('TodoItem', 'text, priority')
    if not match:
        return TodoItem(line, -1)
    text = match.groupdict()['text']
    priority = match.groupdict()['priority']
    return TodoItem(text, int(priority))
Ejemplo n.º 6
0
def priorities():
    """The recognised priorities in this system"""
    Priority = namedtuple('Priority', 'number, name, colour')
    # pylint: disable=bad-whitespace
    # Yeah, f**k off!
    return [
        Priority(0, 'bug',       'red'),
        Priority(1, 'yesterday', 'magenta'),
        Priority(2, 'today',     'blue'),
        Priority(3, 'tomorrow',  'cyan'),
        Priority(4, 'feature',   'green'),
        Priority(5, 'wish',      'yellow'),
        Priority(6, 'text',      'gray'),
    ]
Ejemplo n.º 7
0
def parse_todo_line(line):
    """Extract a list of todo items from a list of lines

    Each item is a tuple of (text, priority)
    """
    item_regexp = re.compile('^(?P<text>.*), (?P<priority>[%s])$' %
                             priority_keys_string())
    match = item_regexp.match(line)
    TodoItem = namedtuple('TodoItem', 'text, priority')
    if not match:
        return TodoItem(line, -1)
    text = match.groupdict()['text']
    priority = match.groupdict()['priority']
    return TodoItem(text, int(priority))
Ejemplo n.º 8
0
        class Value(namedtuple('%sValue' % self.__class__.__name__, cols)):
            def __add__(self, rhs):
                return self._make(imap(operator.add, self, rhs))

            def __sub__(self, rhs):
                return self._make(imap(operator.add, self, rhs))

            def __iadd__(self, rhs):
                return self._make(imap(operator.add, self, rhs))

            def __isub__(self, rhs):
                return self._make(imap(operator.sub, self, rhs))

            def __neg__(self):
                return self._make(imap(operator.neg, self))
Ejemplo n.º 9
0
def loadBDTFromXML(weightsfn, name):
    #
    # load a BDT from an xml weights file
    #
    treedef = namedtuple('treedef', 'index args exprs limits')
    global trees
    if name in trees:
        print 'WARNING overriding existing definition for', name
    xmlfile = xmlreader(weightsfn)
    xmlbuff = xmlfile.buffer()
    rootnode = xmlparser.parse(xmlfile.handle())
    mvavars = [ ele.attrib['Label'] for ele in rootnode.xpath('//Variable') ]
    mvaexpr = [ ele.attrib['Title'] for ele in rootnode.xpath('//Variable') ]
    limits  = [ (float(ele.attrib['Min']), float(ele.attrib['Max'])) for ele in rootnode.xpath('//Variable') ]
    mvaargs = ','.join(mvaexpr) ## << variable expressions in HWWTree
    trees[name] = treedef(len(trees), mvaargs, mvaexpr, zip(mvavars, limits))
    reader = ROOT.tmva.load(name, str(xmlbuff), ':'.join(mvavars), trees[name].index)
    return reader
Ejemplo n.º 10
0
    def genomeSeqScore(indexedHits):
        """ count the number of sequences that match to each genome 
            and return as list of (taxonId, score) """

        genomeScores = {}
        # for each genome, count how many seqIds match
        for seqId, seqHits in indexedHits.iteritems():
            genomes = set([h.genomeId for h in seqHits])
            for g in genomes:
                genomeScores.setdefault(g, 0)
                genomeScores[g]+=1

        genomeScores = genomeScores.items() # convert to list (genome, score)
        GenomeScore = namedtuple.namedtuple("GenomeScore", "taxId, score")
        genomeScores = [GenomeScore(*g) for g in genomeScores] # created named tuples

        maxbio.sortList(genomeScores, "score") # sort, only to make the debug message easier to read
        logger.debug("genomeScores %s" % str(genomeScores))
        return genomeScores
Ejemplo n.º 11
0
    def genomeSeqScore(indexedHits):
        """ count the number of sequences that match to each genome 
            and return as list of (taxonId, score) """

        genomeScores = {}
        # for each genome, count how many seqIds match
        for seqId, seqHits in indexedHits.iteritems():
            genomes = set([h.genomeId for h in seqHits])
            for g in genomes:
                genomeScores.setdefault(g, 0)
                genomeScores[g] += 1

        genomeScores = genomeScores.items()  # convert to list (genome, score)
        GenomeScore = namedtuple.namedtuple("GenomeScore", "taxId, score")
        genomeScores = [GenomeScore(*g)
                        for g in genomeScores]  # created named tuples

        maxbio.sortList(
            genomeScores,
            "score")  # sort, only to make the debug message easier to read
        logger.debug("genomeScores %s" % str(genomeScores))
        return genomeScores
Ejemplo n.º 12
0
                          InvalidJobException,
                          InvalidJobTemplateException,
                          NoActiveSessionException,
                          NoDefaultContactStringSelectedException,
                          ReleaseInconsistentStateException,
                          ResumeInconsistentStateException,
                          SuspendInconsistentStateException,
                          TryLaterException,
                          UnsupportedAttributeException,
                          InvalidArgumentException,
                          InvalidAttributeValueException,
                          OutOfMemoryException,)

Version = _h.Version
JobInfo = _nt.namedtuple("JobInfo",
                         """jobId hasExited hasSignal terminatedSignal
                            hasCoreDump wasAborted resourceUsage""")
# FileTransferMode = _nt.namedtuple("FileTransferMode",
#                                   """transferInputStream transferOutputStream
#                                      transferErrorStream""")

class JobTemplate(object):
    """A job to be submitted to the DRM."""

    HOME_DIRECTORY = '$drmaa_hd_ph$'
    """Home directory placeholder."""
    WORKING_DIRECTORY = '$drmaa_wd_ph$'
    """Working directory placeholder."""
    PARAMETRIC_INDEX = '$drmaa_incr_ph$'
    """Parametric index (for job arrays / bulk jobs) placeholder."""
Ejemplo n.º 13
0
        start = 0
        buf = ""
        for i in sorted(self.scratch):
            if not buf: start = i
            buf += self.scratch[i]
            if i + 1 not in self.scratch:
                data[start] = buf
                buf = ""
        return data

    def dump_scratch(self):
        for start, buf in self.get_scratch().items():
            print hex(start), ":", buf.encode('hex')


Segment = namedtuple("Segment", "selector base limit flags")


def seg_base(e1, e2):
    return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000))


def seg_limit(e1, e2):
    limit = (e1 & 0xffff) | (e2 & 0x000f0000)
    if (e2 & DESC_G_MASK):
        limit = (limit << 12) | 0xfff
    return limit


def load_seg(mem, selector, GDT, LDT):
    selector &= 0xffff
Ejemplo n.º 14
0
            return True
        else:
            return False


class SessionStringAttribute(object):
    def __init__(self, drmaa_function):
        self._f = drmaa_function

    def __get__(self, *args):
        buf = _ct.create_string_buffer(1024)
        c(self._f, buf, _ct.sizeof(buf))
        return buf.value


Version = _nt.namedtuple("Version", "major minor")
Version.__str__ = lambda x: "%s.%s" % (x.major, x.minor)
#Version.__doc__ = """\
#An object representing the DRMAA version.
#
#major and minor attributes are int. For DRMAA 1.0, major == 1 and minor == 0.
#"""


class SessionVersionAttribute(object):
    """A Version attribute."""
    def __get__(self, *args):
        major = _ct.c_uint(10)
        minor = _ct.c_uint(10)
        c(drmaa_version, _ct.byref(major), _ct.byref(minor))
        return Version(major.value, minor.value)
Ejemplo n.º 15
0
import threading
import traceback
import namedtuple
import commands
import functionmapper


CommandArgs = namedtuple.namedtuple('CommandArgs', 'name tokens full actor')


class Singleton(type):
    _instances = {}

    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]


class CommandParser(object):

    __metaclass__ = Singleton
    __slots__ = ("queue", "dispatcher", "parsing", "event")

    def __init__(self):
        print "  CommandParser: Creating and launching the Dispatcher"
        self.queue = []
        self.dispatcher = Dispatcher()
        self.dispatcher.start()

    def parseLine(self, line, entity):
Ejemplo n.º 16
0
import codecs
import random
from collections import defaultdict
from namedtuple import namedtuple

Link = namedtuple('Link', ['position', 'value'])


class BigramChain(defaultdict):
    """A dictionary storing the next possible value, given a list of input sequences."""
    def __init__(self,
                 plugin_module,
                 data=None,
                 encoding='utf-8',
                 size=100,
                 cutoff=1,
                 token=False):
        defaultdict.__init__(self, dict)
        self.plugin_module = plugin_module
        try:
            self.hidden_sequence = self.plugin_module.hidden_sequence
        except AttributeError:
            self.hidden_sequence = False
        if data != None:
            self.load(data, size=size, cutoff=cutoff, token=token)
        self.startkeys = []
        self.status = {'message': '', 'progress': 0}
        self.subscribers = []
        self.limit_frequencies = {}

    def set_status(self, message, progress):
Ejemplo n.º 17
0
                          InvalidJobException,
                          InvalidJobTemplateException,
                          NoActiveSessionException,
                          NoDefaultContactStringSelectedException,
                          ReleaseInconsistentStateException,
                          ResumeInconsistentStateException,
                          SuspendInconsistentStateException,
                          TryLaterException,
                          UnsupportedAttributeException,
                          InvalidArgumentException,
                          InvalidAttributeValueException,
                          OutOfMemoryException,)

Version = _h.Version
JobInfo = _nt.namedtuple("JobInfo",
                         """jobId hasExited hasSignal terminatedSignal
                            hasCoreDump wasAborted exitStatus resourceUsage""")
# FileTransferMode = _nt.namedtuple("FileTransferMode",
#                                   """transferInputStream transferOutputStream
#                                      transferErrorStream""")

class JobTemplate(object):
    """A job to be submitted to the DRM."""

    HOME_DIRECTORY = '$drmaa_hd_ph$'
    """Home directory placeholder."""
    WORKING_DIRECTORY = '$drmaa_wd_ph$'
    """Working directory placeholder."""
    PARAMETRIC_INDEX = '$drmaa_incr_ph$'
    """Parametric index (for job arrays / bulk jobs) placeholder."""
Ejemplo n.º 18
0
    def __init__(self,
                 fileObj,
                 headers=None,
                 fileType=None,
                 types=None,
                 colCount=None):
        """ 
        Parse headers from file (read only first line from file)
        or parse headers from headers parameter
        or use predefined headers according to fileType
        fileType can be: numColumns, blastm8, psl, blastConvert, bed3, bed4, sam 
        """

        self.types = types

        if fileObj:
            self.fileObj = fileObj

        self.commentChar = "#"
        self.line1 = None

        if not fileType and not headers:
            # parse headers from file and set types
            # all to String
            line1 = fileObj.readline().strip("\n")
            line1 = line1.strip("#")
            self.headers = line1.split("\t")
        elif headers:
            self.headers = headers
        else:
            # predefined file formats, set your editor to nowrap lines to read them better
            if fileType == "numbered" or fileType == "numColumns":
                if colCount == None:
                    self.line1 = fileObj.readline()
                    colCount = len(self.line1.split("\t"))
                self.headers = [
                    "col" + str(i) for i in range(0, int(colCount))
                ]
                self.types = [StringType] * len(self.headers)
            elif fileType == "psl":
                self.headers = [
                    "score", "misMatches", "repMatches", "nCount",
                    "qNumInsert", "qBaseInsert", "tNumInsert", "tBaseInsert",
                    "strand", "qName", "qSize", "qStart", "qEnd", "tName",
                    "tSize", "tStart", "tEnd", "blockCount", "blockSizes",
                    "qStarts", "tStarts"
                ]
                self.types = [
                    IntType, IntType, IntType, IntType, IntType, IntType,
                    IntType, IntType, StringType, StringType, IntType, IntType,
                    IntType, StringType, IntType, IntType, IntType, IntType,
                    StringType, StringType, StringType
                ]
            elif fileType == "blastm8":
                self.headers = [
                    "qName", "tName", "percIdentity", "alnLength",
                    "misMatches", "gapOpenCount", "qStart", "qEnd", "tStart",
                    "tEnd", "eVal", "score"
                ]
                self.types = [
                    StringType,
                    StringType,
                    FloatType,
                    IntType,
                    IntType,
                    IntType,
                    IntType,
                    IntType,
                    IntType,
                    IntType,
                    FloatType,
                    IntType,
                ]
            elif fileType == "intmap":
                self.headers = ["int", "string"]
                self.types = [IntType, StringType]
            elif fileType == "blastConvert":
                self.headers = [
                    "pmcId", "genomeId", "seqId", "chrom", "tStart", "tEnd",
                    "score"
                ]
                self.types = [
                    IntType, IntType, IntType, StringType, IntType, IntType,
                    FloatType
                ]
            elif fileType == "bed4":
                self.headers = ["chrom", "start", "end", "name"]
                self.types = [StringType, IntType, IntType, StringType]
            elif fileType == "bed3":
                self.headers = ["chrom", "start", "end"]
                self.types = [StringType, IntType, IntType]
            elif fileType == "sam":
                self.headers = [
                    "qname", "flag", "rname", "pos", "mapq", "cigar", "nrnm",
                    "mpos", "isize", "seq", "qual", "tags"
                ]
                self.types = [
                    StringType, IntType, StringType, IntType, IntType,
                    StringType, StringType, IntType, IntType, StringType,
                    StringType, StringType
                ]
                self.commentChar = "@"

            else:
                logging.error("maxTables.py: illegal fileType\n")
                sys.exit(1)

        if headers:
            self.headers = headers
        if not self.types:
            self.types = [StringType] * len(self.headers)

        self.Record = namedtuple.namedtuple(
            "tuple", self.headers)  # this is a backport from python2.6
Ejemplo n.º 19
0
def systematics(samplecollection, hname, selection='', sysfnlist=None, stats=True, normonly=True, usecounts=True):
    '''
       Read in collections with systematic variations from sysfn,
       which is a list of (paired) .pkl.tar files
    '''
    
    def geterrs(h):
        bins = []
        for ib in xrange(h.GetNbinsX()):
            bins.append(h.GetBinError(ib + 1))
        return bins
    
    def getbins(h, scale):
        bins = []
        for ib in xrange(h.GetNbinsX()):
            bins.append(h.GetBinContent(ib + 1) * (1 + scale))
        return bins
    
    nbins = samplecollection.get_samples()[0][hname].GetNbinsX()

    totalyield = utils.rlist([0.] * nbins, norm=1) # << total yield in each bin
    
    totalstats = utils.rlist([0.] * nbins, norm=2) # << statistical uncertainties 
    totalsysl = utils.rlist([0.] * nbins, norm=2) # << overall downwards fluctuations
    totalsysh = utils.rlist([0.] * nbins, norm=2) # << overall upwards fluctuations

    pair_t = namedtuple('pair_t', 'up dn')
        
    if sysfnlist != None:
        diff_dn = {}
        diff_up = {}
        
        for idefn, defn in sysfnlist:

            sysfn = pair_t(*(defn)) # expect systematics to come in up/down pairs
            
            dummy_up = None
            dummy_dn = None
            
            if sysfn.up != None:
                dummy_up = sample.samplelist([], samplecollection.get_cfg(), name = 'dummy_up', fromfile = sysfn.up)
                dummy_up.log.setLevel('warning')
            if sysfn.dn != None:
                dummy_dn = sample.samplelist([], samplecollection.get_cfg(), name = 'dummy_dn', fromfile = sysfn.dn)
                dummy_dn.log.setLevel('warning')

            diff[sysfn.dn] = utils.rlist([ 0. for ib in xrange(nbins) ], norm = 1)
            diff[sysfn.up] = utils.rlist([ 0. for ib in xrange(nbins) ], norm = 1)
            
            for snom in samplecollection:
                n = snom.get_name()
                if s.get_stype() != bkgtype:
                    continue
                if idefn == 0: 
                    totalstats += geterrs(snom[hname])
                    totalyield += getbins(snom[hname])
                for icoll, coll in enumerate((dummy_up, dummy_dn)):
                    sysn = coll.get_source()
                    if coll == None:
                        continue
                    if snom not in coll:
                        __log.debug('sample [%s] not in systematics file [%s]'%(n, sysn))
                        continue
                    nombins = getbins(snom[hname], 0.)
                    varbins = getbins(coll[n][hname], 0.)
                    diff[sysn] += [ abs(x - y) for x, y in zip(varbins, nombins) ] # variations for each sample added linearly

            if sum(diff[sysfn.dn]) < 0: totalsysl += diff[sysfn.dn]
            else:                       totalsysh += diff[sysfn.dn]
            
            if sum(diff[sysfn.up]) < 0: totalsysl += diff[sysfn.up]
            else:                       totalsysh += diff[sysfn.up] # add uncertainty sources in quadrature

            dummy_up.cleanup()
            dummy_dn.cleanup()
    
    # now the total up/down uncertainties for all samples are calculated
    # along with the summed MC statistical uncertainty (bin-by-bin)
        
    # define the error bands for the total and for the ratio histograms

    h_bkg, s_bkg = sumhistograms([smpl[hname] for smpl in samplecollection.get_bkg_samples()], 'buffBkg')
    h_dat, s_dat = sumhistograms([smpl[hname] for smpl in samplecollection.get_dat_samples()], 'buffDat')

    hgr = ROOT.TGraphAsymmErrors()
    rgr = ROOT.TGraphAsymmErrors()

    if normonly:
        nnom = sum(totalyield)        
        scale_up = sum(totalsysh) / float(nnom)
        scale_dn = sum(totalsysl) / float(nnom)
        totalsysl = utils.rlist(getbins(h_bkg, -scale_dn), norm=2)
        totalsysh = utils.rlist(getbins(h_bkg,  scale_up), norm=2)
        if stats:
            totalsysl += totalstats
            totalsysh += totalstats
    else:
        raise NotImplemented 

    for ib in xrange(1, nbins + 1):
        xl = h_bkg.GetBinLowEdge(ib)
        xh = h_bkg.GetBinLowEdge(ib + 1)
        hgr.SetPoint(ib - 1, xl + (xh - xl) / 2.0, totalyield[ib-1])
        hgr.SetPointError(ib - 1, (xh - xl) / 2.0, (xh - xl) / 2.0, totalsysl[ib-1], totalsysh[ib-1])
        if totalyield[ib-1] != 0.:
            rgr.SetPoint(ib - 1, xl + (xh - xl) / 2.0, 1)
            rerrup = abs(totalsysh[ib-1] / totalyield[ib-1])
            rerrdn = abs(totalsysl[ib-1] / totalyield[ib-1])
            rgr.SetPointError(ib - 1, (xh - xl) / 2.0, (xh - xl) / 2.0, rerrdn, rerrup)
        else:
            rgr.SetPoint(ib - 1, xl + (xh - xl) / 2.0, 1.)
            rerrup = 0.
            rerrdn = 0.
            rgr.SetPointError(ib - 1, (xh - xl) / 2.0, (xh - xl) / 2.0, rerrdn, rerrup)
    hgr.SetFillStyle(3254)
    hgr.SetFillColor(ROOT.kGray + 1)
    hgr.SetLineColor(ROOT.kBlue)
    hgr.SetLineWidth(2)
    hgr.SetLineStyle(1)
    rgr.SetFillStyle(1001)
    rgr.SetFillColor(ROOT.kYellow)

    s_bkg.Delete()
    s_dat.Delete()
    h_bkg.Delete()
    h_dat.Delete()    
    return hgr, rgr
Ejemplo n.º 20
0
def ratio(num, den, canvas=None, logy=False, logx=False,
          scale=globalcfg.ratioYScale, ymin=-0.1e-2, ymax=-1.0,
          xtitle='', ytitle='', rtitle='ratio', fontsize = globalcfg.labelFontSize,
          showhtest=False, xhtest = globalcfg.labelX, yhtest = globalcfg.labelY,
          showoutliers=True, nlabel=None, dlabel=None, ropts='E', nopts=None, dopts=None,
          sysband=None, **kwargs):
    '''
    Plot histograms num and den in top pad, and the ratio num / den in a bottom pad. If canvas
    is None, create new canvas. Otherwise, expect that canvas had two sub-pads defined already.
    '''

    def histmax(h):
        return h.GetBinContent(h.GetMaximumBin())
    def histmin(h):
        return h.GetBinContent(h.GetMinimumBin())
    
    if xtitle == '':
        xtitle = num.xtitle
    name = 'ratio|%s%s'%(num.GetName(), den.GetName())
    hnum = num('temp-hnum')
    hden = den('temp-hden')
    ratio = hnum(name)
    ratio.Divide(hden)
    hnum.Delete()
    hden.Delete()
    nbins = num.GetNbinsX()
    xmin, xmax  = num.GetBinLowEdge(1), num.GetBinLowEdge(nbins) + num.GetBinWidth(nbins)
    ymina, ymaxa = ymin, ymax
    if ymina < 0:
        ymina = max(min(abs(histmin(num)), abs(histmin(den))), 0.01) 
    if ymaxa < 0:
        ymaxa = max(abs(histmax(num)), abs(histmax(den))) / 0.75
    yminb, ymaxb = ratio.GetMinimum() / 1.25, ratio.GetMaximum() / 0.75
    if len(scale) != 0:
        yminb, ymaxb = scale
    if canvas == None:
        if logy:
            ymaxa = pow(10., math.log(abs(ymaxa), 10) / 0.5)
            ymina = max(abs(ymina), abs(ymin))
        canvas = plotter(num.GetName() + '_' + den.GetName())
        canvas = doublepad(canvas, xmin, xmax, ymina, ymaxa, yminb, ymaxb,
                            setLogX = logx, setLogY = logy, gridY = True,
                            xtitle = xtitle, ytitlea = ytitle, ytitleb = rtitle, fontsize = fontsize )
    ptop = canvas.cd(1) # << top pad
    if nopts != None:
        num.Draw('A' + nopts + 'SAMES')
    if dopts != None:
        den.Draw('A' + dopts + 'SAMES')
    pbot = canvas.cd(2) # << bottom pad
    style.applystyle(ratio, style.defaults['hist'])
    if ropts != None:
        ratio.Draw('A' + ropts + 'SAME')
    ratio.SetLineWidth(2)
    ya = ratio.GetYaxis()
    ya.SetRangeUser(yminb - 0.1, ymaxb + 0.1)
    if sysband != None:
        sysband.Draw('E2')
        canvas += sysband
    graph = asgraph(ratio)
    if ropts != None:
        ratio.Draw('A' + ropts + 'SAME')
        graph.Draw('0')
    graph.SetLineWidth(2)
    if showoutliers:
        for ib in xrange(ratio.GetNbinsX()):
            if ratio.GetBinContent(ib+1) > ymaxb:
                arrow = ROOT.TArrow(ratio.GetBinCenter(ib+1), ymaxb - 0.25,
                                    ratio.GetBinCenter(ib+1), ymaxb - 0.05)
                arrow.SetLineColor(ROOT.kRed)
                arrow.SetFillColor(ROOT.kRed)
                arrow.SetLineWidth(4)
                arrow.SetArrowSize(0.010)
                arrow.SetOption("|>")
                arrow.Draw()
                canvas += arrow
                pass
            if ratio.GetBinContent(ib+1) < yminb:
                arrow = ROOT.TArrow(ratio.GetBinCenter(ib+1), yminb + 0.25,
                                    ratio.GetBinCenter(ib+1), yminb + 0.05)
                arrow.SetLineColor(ROOT.kRed)
                arrow.SetFillColor(ROOT.kRed)
                arrow.SetLineWidth(4)
                arrow.SetArrowSize(0.010)
                arrow.SetOption("|>")
                arrow.Draw()
                canvas += arrow
    canvas += graph
    line = ROOT.TLine(xmin, 1, xmax, 1)
    line.SetLineColor(style.colors['black'])
    line.SetLineWidth(3)
    line.Draw()
    canvas += line
    canvas.cd(1) # << top pad

    if showhtest:
        nbins = num.GetNbinsX()
        chi_sqr = num.Chi2Test(den, "WW CHI2")
        ks      = num.KolmogorovTest(den)
        canvas += atlaslabel.label(xhtest, yhtest,
                                    '#chi^{2}/N = %0.1f/%d,   KS = %0.2g '%(chi_sqr, nbins, ks),
                                    size = fontsize, angle = globalcfg.labelA)
        setattr(canvas, 'ks', ks)

    if dlabel != None and nlabel != None:
        leg = ROOT.TLegend(globalcfg.legendX, globalcfg.legendY - 3.0 * globalcfg.legendFontSize,
                           globalcfg.legendX + globalcfg.legendW, globalcfg.legendY)
        leg.AddEntry(num, nlabel, 'lpf')
        leg.AddEntry(den, dlabel, 'lpf')
        leg.SetFillColor(0)
        leg.Draw('SAME')
        canvas += leg
    
    return namedtuple('tuple', 'ratiohist ratiograph top bottom canvas')(ratio, graph, ptop, pbot, canvas)
Ejemplo n.º 21
0
    def __init__(self, fileObj, headers=None, fileType=None, types=None, colCount=None):
        """ 
        Parse headers from file (read only first line from file)
        or parse headers from headers parameter
        or use predefined headers according to fileType
        fileType can be: numColumns, blastm8, psl, blastConvert, bed3, bed4, sam 
        """

        self.types=types

        if fileObj:
            self.fileObj = fileObj

        self.commentChar="#"
        self.line1=None

        if not fileType and not headers:
            # parse headers from file and set types 
            # all to String
            line1 = fileObj.readline().strip("\n")
            line1 = line1.strip("#")
            self.headers = line1.split("\t")
        elif headers:
            self.headers = headers
        else:
            # predefined file formats, set your editor to nowrap lines to read them better
            if fileType=="numbered" or fileType=="numColumns":
                if colCount==None:
                    self.line1 = fileObj.readline()
                    colCount = len(self.line1.split("\t"))
                self.headers = ["col"+str(i) for i in range(0, int(colCount))]
                self.types   = [StringType] * len(self.headers)
            elif fileType=="psl":
                self.headers = ["score", "misMatches", "repMatches", "nCount", "qNumInsert", "qBaseInsert", "tNumInsert", "tBaseInsert", "strand",    "qName",    "qSize", "qStart", "qEnd", "tName",    "tSize", "tStart", "tEnd", "blockCount", "blockSizes", "qStarts", "tStarts"]
                self.types =   [IntType, IntType,       IntType,      IntType,  IntType,      IntType,       IntType,       IntType,      StringType, StringType, IntType, IntType,  IntType, StringType, IntType, IntType,  IntType, IntType ,   StringType,      StringType,   StringType]
            elif fileType=="blastm8":
                self.headers = ["qName",    "tName",    "percIdentity", "alnLength", "misMatches", "gapOpenCount", "qStart", "qEnd", "tStart", "tEnd",  "eVal",    "score"]
                self.types =   [StringType, StringType, FloatType,      IntType,      IntType,      IntType,        IntType, IntType, IntType, IntType, FloatType, IntType,]
            elif fileType=="intmap":
                self.headers = ["int", "string"]
                self.types = [IntType, StringType]
            elif fileType=="blastConvert":
                self.headers = ["pmcId", "genomeId", "seqId", "chrom", "tStart", "tEnd", "score"]
                self.types =   [IntType, IntType,    IntType, StringType, IntType, IntType, FloatType]
            elif fileType=="bed4":
                self.headers = ["chrom", "start", "end", "name"]
                self.types =   [StringType, IntType, IntType, StringType]
            elif fileType=="bed3":
                self.headers = ["chrom", "start", "end"]
                self.types =   [StringType, IntType, IntType]
            elif fileType == "sam":
                self.headers = ["qname"    , "flag"  , "rname"    , "pos"   , "mapq"  , "cigar"    , "nrnm"     , "mpos"  , "isize" , "seq"      , "qual"     , "tags"]
                self.types   = [StringType , IntType , StringType , IntType , IntType , StringType , StringType , IntType , IntType , StringType , StringType , StringType]
                self.commentChar="@"
                
            else:
                logging.error("maxTables.py: illegal fileType\n")
                sys.exit(1)

        if headers:
            self.headers = headers
        if not self.types:
            self.types = [StringType] * len(self.headers)

        self.Record = namedtuple.namedtuple("tuple", self.headers) # this is a backport from python2.6
Ejemplo n.º 22
0
 def _defineRowType(self):
     colNames = [desc[0] for desc in self.cursor.description]
     self.RowType = namedtuple.namedtuple("MysqlRow", colNames)
Ejemplo n.º 23
0
def create_nuple2():
  MyTuple=namedtuple("one two three")
  MyTuple(v)
Ejemplo n.º 24
0
    """ write tuples to tabsep file in format (genomeId, chrom, start, end, pmcId, seqId, score, chainId) """
    for f in features:
        data = [f.genomeId, f.chrom, f.start, f.end, pmcId, f.seqId, f.score, f.chainId]
        data = [str(e) for e in data]
        file.write("\t".join(data)+"\n")

def removeUnivec(bestHits):
    """ remove all hits with a genomeid==0 """
    noiseSeqIds = set([bh.seqId for bh in bestHits if bh.genomeId==0])
    result = [bh for bh in bestHits if bh.seqId not in noiseSeqIds]
    logger.debug("%d non-univec hits" % (len(bestHits)))
    if len(result)==0:
        logger.debug("No hits left, skipping this document")
    return result

ChainedFeature = namedtuple.namedtuple("chainedFeature", "genomeId, chrom, start, end, seqId, score, seqIds, chainId")

def chainHitsGenes(hits):
    """ index by geneId, convert to "chainedFeatures" (the chain is the just
    the gene here) and set the seqIds attribute to all seqids that match a
    particular gene """ 
    
    # index by gene
    logger.debug("%d unchained gene hits" % len(hits))
    geneFeatures = {}
    for f in hits:
        gene = f.chrom.split("|")[1]
        geneFeatures.setdefault(gene, []).append( f)

    # convert to chained features: chrom=transcript, chainId=gene
    chainedFeatures = {}
Ejemplo n.º 25
0
                self.cache[doi] = pmid
                return pmid


def filenameToId(fname):
    """ resolves a filename to a document id, returns an int """
    baseName = os.path.basename(fname)
    pmcId = int(os.path.splitext(baseName)[0].replace("PMC", ""))
    return pmcId


# ==== SEQUENCE EXTRACTION =====

#  ... from directory

DocumentSequences = namedtuple.namedtuple(
    "DocumentSequences", "docId, location, nucleotideOccurrences")


def generateDocumentSequences(folder):
    """ An iterator. Parses a folder with .txt and .xml files, but prefers txt files. Yields DocumentSequences"""

    # little helper
    def findSubdirFiles(baseDir, extensions):
        """ Generator: traverse a baseDir and all subdirectories to find all files with certain extensions, extension is dot plus the extension, like ".xml"  """
        for root, dirs, files in os.walk(baseDir):
            for f in files:
                if os.path.splitext(f)[1] in extensions:
                    yield os.path.join(root, f)

    # get list of all basenames in this dir
    filenames = list(findSubdirFiles(folder, ['.xml', '.txt']))
Ejemplo n.º 26
0
def stack(samplecollection, histoname, samplenames, opt = 'HIST',
          canvas = None, ipad=1, ylog = False, sigstacked = False, multisignal = globalcfg.multisignal, blinding=None,
          showbands = False, selection = '', sysfn = None, syscalc = systematics, bands = (None,None),
          showinset = False, xinset = 1, yinset = 1, lumi = -1,
          showhtest = '', xhtest = globalcfg.labelX, yhtest = globalcfg.labelY,
          fontsize = globalcfg.labelFontSize, legend = True, xtitle = '', ytitle = '', label = None,
          xmin = None, xmax = None, ymin = None, ymax = None): 
    '''
    Draw background stack histograms, with (optionally) summed signal and data histograms overlaid
    '''

    def histmax(h):
        return h.GetBinContent(h.GetMaximumBin())
    
    __log.info('create stack for [%s]'%(histoname))

    #
    # first create the canvas and find the signal, background and data histograms
    # defined in the samplecollection
    #
    can = canvas
    if can == None:
        can = plotter(histoname)
    samplemap = {}
    for item in samplecollection.get_samples():
        if item.get_name() not in samplenames:
            continue
        samplemap[item.get_name()] = item
    bkg_histos = []
    sig_histos = []
    dat_histos = []
    for key in samplenames: 
        s = samplemap[key]
        if s.get_stype() == sigtype:
            sig_histos += [s[histoname]]
        if s.get_stype() == bkgtype:
            bkg_histos += [s[histoname]]
        if s.get_stype() == dattype:
            dat_histos += [s[histoname]]
    sum_bkg, stk_bkg = sumhistograms(bkg_histos, histoname + 'Bkg')
    sum_sig, stk_sig = sumhistograms(sig_histos, histoname + 'Sig')
    sum_dat, stk_dat = sumhistograms(dat_histos, histoname + 'Dat')

    #
    # use sum of background histograms as the placeholder histogram to set axis
    # scale, axis labels &c. on the canvas
    #

    main_hist = sum_bkg

    if main_hist == None:
        main_hist = histogram.hist1D(histoname + 'FakeSummed', sum_sig.xtitle, sum_sig.nbinsx-1, sum_sig.binsx)
        main_hist.xtitle = sum_sig.xtitle
        main_hist.ytitle = sum_sig.ytitle
        stk_bkg = ROOT.THStack(histoname + 'FakeStack', histoname + 'FakeStack')
        stk_bkg.Add(main_hist)
    
    if sigstacked:
        if multisignal:
            for isigh, sigh in enumerate(sig_histos):
                sig_histos[isigh] = sigh()
                for bkgh in bkg_histos:
                    sig_histos[isigh] += bkgh
        else:
            for bkgh in bkg_histos:
                sum_sig += bkgh
    
    #
    # define systematic & statistical uncertainty bands
    #
    
    herr, rerr = bands
    if showbands and bands == (None,None):
        herr, rerr = syscalc(samplecollection, histoname, selection, sysfn, stats=True)
        
    can.cd(ipad)

    #
    # calculate the x- and y-axis ranges so that stack and data overlay are
    # always visible
    #
    
    lmaxy, maxy = -1, -1
    if multisignal:
        for h in (sig_histos + [ sum_bkg, sum_dat ]):
            try: maxy = max(histmax(h), maxy)
            except: pass
    else:
        for h in [ sum_bkg, sum_sig, sum_dat ]:
            try: maxy = max(histmax(h), maxy)
            except: pass    
    if sigstacked:
        try:    maxy = max(histmax(sum_bkg) + histmax(sum_sig), maxy)
        except: pass
    lmaxy = maxy / 0.6
    if ylog:
        lmaxy = pow(10., math.log(lmaxy, 10) / 0.5)
    can += (herr, rerr)
    main_hist.Draw(opt)
    main_hist.GetYaxis().SetRangeUser(0.025, lmaxy)
    if ymin != None and ymax != None:
        main_hist.GetYaxis().SetRangeUser(ymin, ymax)
    if xmin != None and xmax != None:
        main_hist.GetXaxis().SetRangeUser(xmin, xmax)

    #
    # set the x- and y-axis titles
    #
    
    if ytitle != '': main_hist.GetYaxis().SetTitle(ytitle)
    else           : main_hist.GetYaxis().SetTitle(main_hist.ytitle)
    if xtitle != '': main_hist.GetXaxis().SetTitle(xtitle)
    else           : main_hist.GetXaxis().SetTitle(main_hist.xtitle)
    main_hist.GetYaxis().SetTitleOffset(1.1)
    

    if blinding!=None and sum_dat != None:
        for ibin in xrange(sum_bkg.GetNbinsX()):
            csig = sum_sig.GetBinContent(ibin+1)
            cbkg = sum_bkg.GetBinContent(ibin+1)
            if blinding(csig, cbkg, ibin):
                if sum_dat != None:
                    sum_dat.SetBinContent(ibin+1, 0)
                    sum_dat.SetBinError(ibin+1, 0)
            
    stk_bkg.Draw(opt + 'SAME')
    if herr != None:
        herr.Draw("E2")
    can += sum_bkg 
    can += stk_bkg
    if not multisignal:
        if sum_sig != None:
            can += sum_sig 
            can += stk_sig
            sum_sig.Draw(opt + 'SAME')
    else:
        for hsig in sig_histos:
            hsig.Draw(opt + 'SAME')
    if sum_dat != None:
        can += sum_dat
        can += stk_dat  
        sum_dat.Draw('ESAME')
    if ylog:
        can.SetLogy(True)

    #
    # perform statistical tests (if requested) comparing sum of backgrounds
    # (+ signal, optionally) to data overlay
    #
    
    if showhtest != '':
        nbins = sum_bkg.GetNbinsX()
        chi_sqr = -1
        ks      = -1
        if showhtest == bkgtype and sum_dat != None:
            chi_sqr = sum_bkg.Chi2Test(sum_dat, "WU CHI2")
            ks      = sum_bkg.KolmogorovTest(sum_dat)
        elif showhtest == sigtype and sum_dat != None and not multisignal:
            buff = sum_bkg + sum_sig
            chi_sqr = buff.Chi2Test(sum_dat, "WU CHI2")
            ks      = buff.KolmogorovTest(sum_dat)
            buff.Delete()
        can += atlaslabel.label(xhtest, yhtest,
                                '#chi^{2}/N = %0.1f/%d,   KS = %0.2g '%(chi_sqr, nbins, ks),
                                 size = fontsize, angle = globalcfg.labelA)

    #
    # create and draw the legend
    #
    if legend:
        ylegbot = globalcfg.legendY - (globalcfg.legendFontSize/1.4 * len(samplenames))
        delta = 0 ## ...
        legitems = []
        if globalcfg.legendShowSumBkg and herr != None:
            if sysfn != None: legitems = [(herr, 'SM (sys #oplus stat)', 'lf')]
            else            : legitems = [(herr, 'SM (stat)', 'lf')]
        leg = samplelegend(samplecollection, histoname,
                            globalcfg.legendX, globalcfg.legendX + globalcfg.legendW,
                            delta + ylegbot, delta + globalcfg.legendY,
                            can, samplenames, legitems)
        can += leg

    #
    # add the ATLAS labels
    #
    can += style.applyAtlasLabels(can, lumi = lumi, sub=label)
    
    # @TODO apply inset zoom option
    
    tupl = namedtuple('tuple', 'can stk_bkg sum_bkg stk_sig sum_sig stk_dat sum_dat total_err')
    return tupl(can, stk_bkg, sum_bkg, stk_sig, sum_sig, stk_dat, sum_dat, herr)
Ejemplo n.º 27
0
 def _defineRowType(self):
     colNames = [desc[0] for desc in self.cursor.description]
     self.RowType = namedtuple.namedtuple("MysqlRow", colNames)
Ejemplo n.º 28
0
def ratiostack(samplecollection, histoname, samplenames, opt = '', ylog = False,
               showbands = False, selection = '', sysfn = None, syscalc = systematics, bands = (None,None),
               showinset = False, xinset = 1, yinset = 1, lumi = -1,
               showhtest = '', xhtest = globalcfg.labelX, yhtest = globalcfg.labelY-0.2, showmconly=False, divideFirstBin = 0,
               showsoverb = False, fontsize = globalcfg.labelFontSizeLarge, legend = True, blinding=None,
               scale = globalcfg.ratioYScale, xtitle = '', ytitle = '', label = None,
               sigstacked=False, multisignal = globalcfg.multisignal, **kwargs):
    '''
    Draw background stack histograms, with summed signal and data histograms overlaid and show ratio of
    sum backgrounds / data or sum signal / sum backgrounds

      showbands : show systematics bands on the stack and on the ratio, using selection in the sysfn
      showinset : if True, show an inset zoom canvas
      showhtest : if != '', calculate chi-square and KS test for "bkg" or "sig" (= signal and background), w.r.t. data
      scale : the y-axis for the ratio pad
    '''
    __log.info('create ratio stack for [%s]'%(histoname))
    
    samplemap = {}
    for item in samplecollection.get_samples():
        if item.get_name() not in samplenames:
            continue
        samplemap[item.get_name()] = item
    bkg_histos = []
    sig_histos = []
    dat_histos = []
    for key in samplenames: 
        s = samplemap[key]
        if divideFirstBin != 1:
            firstbin = s[histoname].GetBinContent( 1 )
            firstbinerror = s[histoname].GetBinError( 1 )
            s[histoname].SetBinContent( 1, firstbin / divideFirstBin )
            s[histoname].SetBinError( 1, firstbinerror / divideFirstBin )

        if s.get_stype() == sigtype:
            sig_histos += [s[histoname]]
        if s.get_stype() == bkgtype:
            bkg_histos += [s[histoname]]
        if s.get_stype() == dattype:
            dat_histos += [s[histoname]]
    sum_bkg, stk_bkg = sumhistograms(bkg_histos, histoname + 'Sig')
    sum_sig, stk_sig = sumhistograms(sig_histos, histoname + 'Bkg')
    if showmconly:
        sum_dat, stk_dat = None, None
    else:
        sum_dat, stk_dat = sumhistograms(dat_histos, histoname + 'Dat')

    if sum_bkg == None or (sum_dat == None and not showmconly):
        __log.error('cannot calculate ratio if data and backgrounds in sample collection are undefined!')
        return None

    #
    # define uncertainty bands
    #
    
    herr, rerr = bands
    if showbands and (herr == None or rerr == None):
        herr, rerr = syscalc(samplecollection, histoname, selection, sysfn, stats = True)

    #
    # define the x- and y-axis ranges
    #
    
    def histmax(h):
        return h.GetBinContent(h.GetMaximumBin())
    
    lmaxy, maxy = -1, -1
    for h in (sum_bkg, sum_sig, sum_dat):
        try:
            if histmax(h) > maxy: maxy = histmax(h)
        except:
            pass
    try:
        maxy = max(histmax(sum_bkg) + histmax(sum_sig), maxy)
        maxy = max(histmax(sum_dat), maxy)
    except:
        pass
    if not ylog:
        lmaxy = maxy * 1.25
    else:
        lmaxy = pow(10., math.log(maxy, 10) / 0.75)

    #
    # set the axis labels
    #
    
    if xtitle == '':  xtitle = sum_bkg.xtitle
    if ytitle == '':  ytitle = sum_bkg.ytitle

    if showmconly:
        if showsoverb: sum_dat_cl = sum_sig()
        else:          sum_dat_cl = sum_sig() + sum_bkg()
    else:
        sum_dat_cl = sum_dat()
    
    #
    # build the ratio plot = data / sum backgrounds
    #
    if showsoverb:
        rtitle = 'S/B'
    ratiohist, ratiograph, ptop, pbot, can = ratio(sum_dat_cl, sum_bkg, canvas = None, logy = ylog,
                                                   ymax = lmaxy, ymin = 0.025,
                                                   scale = scale, xtitle = xtitle, ytitle = ytitle, rtitle = rtitle, sysband = rerr, **kwargs)

    #
    # offset the signal histograms if drawing on top of background stack
    #
    if sigstacked:
        if multisignal:
            for isigh, sigh in enumerate(sig_histos):
                sig_histos[isigh] = sigh()
                for bkgh in bkg_histos:
                    sig_histos[isigh] += bkgh
        else:
            for bkgh in bkg_histos:
                sum_sig += bkgh
    
    #
    # draw the background, ratio, signal and data overlays - with uncertainty bands (if requested)
    #

    if blinding!=None:
        for ibin in xrange(sum_bkg.GetNbinsX()):
            csig = sum_sig.GetBinContent(ibin+1)
            cbkg = sum_bkg.GetBinContent(ibin+1)
            if blinding(csig, cbkg, ibin):
                    if sum_dat != None:
                        sum_dat.SetBinContent(ibin+1, 0)
                        sum_dat.SetBinError(ibin+1, 0)
                    ratiohist.SetBinContent(ibin+1, 0)
                    ratiohist.SetBinError(ibin+1, 0)
                    ratiograph.RemovePoint(ibin)

    can += ratiohist
    can += (herr, rerr)
    sum_bkg.Draw('AHISTSAME][' + opt)
    if herr != None:
        herr.Draw("E2")
    stk_bkg.Draw('HISTSAMES][')
    can += sum_bkg 
    can += stk_bkg 
    if sum_sig != None:
        can += sum_sig 
        can += stk_sig
        if multisignal:
            for h in sig_histos:
                h_cl = h()
                h_cl.Draw('HISTSAMES')
                can += h_cl
        else:
            sum_sig.Draw('HISTSAMES')
    can += sum_dat
    can += stk_dat
    if sum_dat != None:
        sum_dat.Draw('ESAMES][')
    
    if ylog:
        can.SetLogy(True)

    #
    # calculate the statistical tests
    #
    
    if showhtest != '':
        nbins = sum_bkg.GetNbinsX()
        chi_sqr = -1
        ks      = -1
        if showhtest == bkgtype and sum_dat != None:
            chi_sqr = sum_bkg.Chi2Test(sum_dat, "WU CHI2")
            ks      = sum_bkg.KolmogorovTest(sum_dat)
        elif showhtest == sigtype and sum_dat != None:
            buff = sum_bkg + sum_sig
            chi_sqr = buff.Chi2Test(sum_dat, "WU CHI2")
            ks      = buff.KolmogorovTest(sum_dat)
            buff.Delete()
        ptop.cd()
        
        can += atlaslabel.label(xhtest, yhtest,
                                 '#chi^{2}/N = %0.1f/%d,   KS = %0.3g '%(chi_sqr, nbins, ks),
                                 size = fontsize, angle = globalcfg.labelA)
        ptop.cd()

    #
    # add the sample legend
    #
    
    if legend:
        ylegbot = globalcfg.legendY - (0.035 * len(samplenames))
        delta = 0 
        legitems = []
        if globalcfg.legendShowSumBkg and herr != None:
            if sysfn != None: legitems = [(herr, 'SM (sys #oplus stat)', 'lf')]
            else            : legitems = [(herr, 'SM (stat)', 'lf')]
        leg = samplelegend(samplecollection, histoname,
                           globalcfg.legendX, globalcfg.legendX + globalcfg.legendW,
                           delta + globalcfg.legendY - (globalcfg.legendFontSize * len(samplenames)), globalcfg.legendY + delta,
                           ptop, samplenames, legitems)
        can += leg
    xd = globalcfg.atlaslabelXSub - globalcfg.atlaslabelX
    yd = globalcfg.atlaslabelYSub - globalcfg.atlaslabelY
    can += style.applyAtlasLabels(ptop, xd = xd, yd = yd, size = globalcfg.labelFontSizeSub, lumi = lumi, sub=label)

    # @TODO apply inset zoom option

    #
    # include S / B plot in the ratio sub-pad
    #
    
    if showsoverb:
        pbot.cd()
        soverb = sum_sig(histoname + ':S/B')
        soverb.Divide(sum_bkg)
        soverb.Draw('ESAME')
        can += soverb
        ptop.cd()
        
    tupl = namedtuple('tuple', 'can top bot stk_bkg sum_bkg stk_sig sum_sig stk_dat sum_dat total_err ratio_err graph')
    return tupl(can, ptop, pbot, stk_bkg, sum_bkg, stk_sig, sum_sig, stk_dat, sum_dat, herr, rerr, ratiograph)
Ejemplo n.º 29
0
        bestElements = bestScoreElements(idList, scoreField)
        filteredList.extend(bestElements)
    return filteredList


def removeBigSets(predDict, limit):
    """ given a dict with string -> set , remove elements where len(set) >= than limit """
    result = {}
    for key, predSet in predDict:
        if len(predSet) < limit:
            result[key] = predSet
    return result


# return types for benchmark()
BenchmarkResult = namedtuple.namedtuple("BenchResultRec", "TP, FN, FP, Prec, Recall, F, errList, objCount")
ErrorDetails = namedtuple.namedtuple("ErrorDetails", "id, expected, predicted")


def benchmark(predDict, refDict):
    """ returns a class with attributes for TP, FN, FP and various other counts and information about prediction errors 
    >>> benchmark({"a" : set([1,2,3]), "b" : set([3,4,5])}, {"a":set([1]), "b":set([4])})
    BenchResultRec(TP=2, FN=0, FP=4, Prec=0.3333333333333333, Recall=1.0, F=0.5, errList=[ErrorDetails(id='a', expected=set([1]), predicted=set([1, 2, 3])), ErrorDetails(id='b', expected=set([4]), predicted=set([3, 4, 5]))], objCount=2)
    """
    OBJECTNAME = "documents"

    TP, FN, FP = 0, 0, 0
    objCount = 0
    atLeastOneHit = 0

    errDetails = []
Ejemplo n.º 30
0
import signal
import threading
import Queue

from namedtuple import namedtuple
from EventThread import EventThread

def clamp(a, low, high):
    return min(max(a, low), high)

def roundBelowToZero(x, threshold):
    if abs(x) <= threshold:
        return 0
    return x

ButtonEvent = namedtuple("ButtonEvent", "button down")
AxisEvent = namedtuple("AxisEvent", "left x y")
DPadEvent = namedtuple("DPadEvent", "x y")

class XboxPadSubscription(EventThread):
    AXIS_DEAD_ZONE = 0.1
    TRIGGER_ACTIVATION = 0.5

    @classmethod
    def __getAdjustedAxis(cls, pad, axis):
        return roundBelowToZero(clamp(pad.get_axis(axis), -1.0, 1.0), cls.AXIS_DEAD_ZONE)

    @classmethod
    def __getStickState(cls, pad, axisStart):
        return (cls.__getAdjustedAxis(pad, axisStart), cls.__getAdjustedAxis(pad, axisStart+1))
Ejemplo n.º 31
0
                pmid = l.strip().replace("<Id>","").replace("</Id>", "")
                # strip off part after first _!
                self.cache[doi]=pmid
                return pmid

def filenameToId(fname):
    """ resolves a filename to a document id, returns an int """
    baseName = os.path.basename(fname)
    pmcId = int(os.path.splitext(baseName)[0].replace("PMC",""))
    return pmcId

# ==== SEQUENCE EXTRACTION =====

#  ... from directory

DocumentSequences = namedtuple.namedtuple("DocumentSequences", "docId, location, nucleotideOccurrences")

def generateDocumentSequences(folder):
    """ An iterator. Parses a folder with .txt and .xml files, but prefers txt files. Yields DocumentSequences"""

    # little helper
    def findSubdirFiles(baseDir, extensions):
        """ Generator: traverse a baseDir and all subdirectories to find all files with certain extensions, extension is dot plus the extension, like ".xml"  """
        for root, dirs, files in os.walk(baseDir):
            for f in files:
                if os.path.splitext(f)[1] in extensions:
                    yield os.path.join(root, f)

    # get list of all basenames in this dir
    filenames = list(findSubdirFiles(folder, ['.xml','.txt']))
    baseNames = set([os.path.splitext(fname)[0] for fname in filenames])
Ejemplo n.º 32
0
    saved = {}
    def call(*args):
        try:
            return saved[args]
        except KeyError:
            res = func(*args)
            saved[args] = res
            return res
        except TypeError:
            # Unhashable argument
            return func(*args)
    call.func_name = func.func_name
    return call

#Create a named tuple type along with fields.
MyTuple = namedtuple("one two three")
print MyTuple
#Instantiate a test named tuple and dictionary.
my_tuple = MyTuple(one=1, two=2, three=3)
my_dict = {"one":1, "two":2, "three":3}

#namedtuple = memoize(namedtuple)
#Test function.  Read tuple values.
k = ("one", "two", "three")
v = (1, 2, 3)

def create_nuple():
  #MyTuple=namedtuple("MyTuple", "one two three")
  MyTuple(v)

def create_nuple2():
        return str(value)

    @staticmethod
    def from_drmaa(value):
        return int(value)

class SessionStringAttribute(object):

    def __init__(self, drmaa_function):
        self._f = drmaa_function
    def __get__(self, *args):
        buf = _ct.create_string_buffer(_BUFLEN)
        c(self._f, buf, _ct.sizeof(buf))
        return buf.value

Version = _nt.namedtuple("Version", "major minor")
Version.__str__ = lambda x: "%s.%s" % (x.major, x.minor)
#Version.__doc__ = """\
#An object representing the DRMAA version.
#
#major and minor attributes are int. For DRMAA 1.0, major == 1 and minor == 0.
#"""

class SessionVersionAttribute(object):
    """A Version attribute."""
    def __get__(self, *args):
        major = _ct.c_uint(10)
        minor = _ct.c_uint(10)
        c(drmaa_version, _ct.byref(major), _ct.byref(minor))
        return Version(major.value, minor.value)
Ejemplo n.º 34
0
    for id, idList in map.iteritems():
        bestElements = bestScoreElements(idList, scoreField)
        filteredList.extend(bestElements)
    return filteredList

def removeBigSets(predDict, limit):
    """ given a dict with string -> set , remove elements where len(set) >= than limit """
    result = {}
    for key, predSet in predDict:
        if len(predSet)<limit:
            result[key] = predSet
    return result


# return types for benchmark()
BenchmarkResult = namedtuple.namedtuple("BenchResultRec", "TP, FN, FP, Prec, Recall, F, errList, objCount")
ErrorDetails    = namedtuple.namedtuple("ErrorDetails", "id, expected, predicted")

def benchmark(predDict, refDict):
    """ returns a class with attributes for TP, FN, FP and various other counts and information about prediction errors 
    >>> benchmark({"a" : set([1,2,3]), "b" : set([3,4,5])}, {"a":set([1]), "b":set([4])})
    BenchResultRec(TP=2, FN=0, FP=4, Prec=0.3333333333333333, Recall=1.0, F=0.5, errList=[ErrorDetails(id='a', expected=set([1]), predicted=set([1, 2, 3])), ErrorDetails(id='b', expected=set([4]), predicted=set([3, 4, 5]))], objCount=2)
    """
    OBJECTNAME="documents"

    TP, FN, FP = 0, 0, 0
    objCount = 0
    atLeastOneHit = 0

    errDetails = []
    completeMatch = 0
Ejemplo n.º 35
0
        data = [str(e) for e in data]
        file.write("\t".join(data) + "\n")


def removeUnivec(bestHits):
    """ remove all hits with a genomeid==0 """
    noiseSeqIds = set([bh.seqId for bh in bestHits if bh.genomeId == 0])
    result = [bh for bh in bestHits if bh.seqId not in noiseSeqIds]
    logger.debug("%d non-univec hits" % (len(bestHits)))
    if len(result) == 0:
        logger.debug("No hits left, skipping this document")
    return result


ChainedFeature = namedtuple.namedtuple(
    "chainedFeature",
    "genomeId, chrom, start, end, seqId, score, seqIds, chainId")


def chainHitsGenes(hits):
    """ index by geneId, convert to "chainedFeatures" (the chain is the just
    the gene here) and set the seqIds attribute to all seqids that match a
    particular gene """

    # index by gene
    logger.debug("%d unchained gene hits" % len(hits))
    geneFeatures = {}
    for f in hits:
        gene = f.chrom.split("|")[1]
        geneFeatures.setdefault(gene, []).append(f)
Ejemplo n.º 36
0
from namedtuple import namedtuple

FocusChangeEvent = namedtuple("FocusChangeEvent", "window")
LoadingRulesEvent = namedtuple("LoadingRulesEvent", "state")
MicrophoneEvent = namedtuple("MicrophoneEvent", "state")
RuleActivateEvent = namedtuple("RuleActivateEvent", "rule")
RuleDeactivateEvent = namedtuple("RuleDeactivateEvent", "rule")
RuleMatchEvent = namedtuple("RuleMatchEvent", "hash phrase extras words")
RuleRegisterEvent = namedtuple("RuleRegisterEvent", "rule")
WindowListEvent = namedtuple("WindowListEvent", "windows")
WordEvent = namedtuple("WordEvent", "words")
WordListEvent = namedtuple("WordListEvent", "name words")
RecognitionStateEvent = namedtuple("RecognitionStateEvent", "state")
PedalsEvent = namedtuple("PedalsEvent", "pedalStates changed")
ExitEvent = namedtuple("ExitEvent", [])
RepeatRequestEvent = namedtuple("RepeatRequestEvent", [])

# emacs events
BufferListEvent = namedtuple("BufferListEvent", "query choices")
EmacsConnectedEvent = namedtuple("EmacsConnectedEvent", [])
EmacsSymbolEvent = namedtuple("EmacsSymbolEvent", "choices")
EmacsWordEvent = namedtuple("EmacsWordEvent", "choices")
MajorModeEvent = namedtuple("MajorModeEvent", "modeList")
NickEvent = namedtuple("NickEvent", "choices")
ProjectFileListEvent = namedtuple("BufferListEvent", "choices")
ProjectListEvent = namedtuple("ProjectListEvent", "choices")


class ConnectedEvent(object):
    pass
Ejemplo n.º 37
0
        buf = ""
        for i in sorted(self.scratch):
            if not buf:
                start = i
            buf += self.scratch[i]
            if i + 1 not in self.scratch:
                data[start] = buf
                buf = ""
        return data

    def dump_scratch(self):
        for start, buf in self.get_scratch().items():
            print hex(start), ":", buf.encode("hex")


Segment = namedtuple("Segment", "selector base limit flags")


def seg_base(e1, e2):
    return (e1 >> 16) | ((e2 & 0xFF) << 16) | (e2 & 0xFF000000)


def seg_limit(e1, e2):
    limit = (e1 & 0xFFFF) | (e2 & 0x000F0000)
    if e2 & DESC_G_MASK:
        limit = (limit << 12) | 0xFFF
    return limit


def load_seg(mem, selector, GDT, LDT):
    selector &= 0xFFFF
Ejemplo n.º 38
0
Archivo: run.py Proyecto: dtmori/HWW
# -------------==============-------------==============-------------==============

log = msg.msglog( 'run', 'info' )

# -------------------------

from evhandler import EventHandler


def munch( str, delim=' ' ):
    tokens = str.split( delim )
    return tokens[0], delim.join( tokens[1:] )

# -------------------------

hdef1D_t = namedtuple.namedtuple( 'hdef1D_t', 'name expression title bins' )
hdef2D_t = namedtuple.namedtuple( 'hdef2D_t', 'name expression titlex binsx titley binsy' )

# -------------==============-------------==============-------------==============

sample_names          = []      # list of samples to process

systematics_files     = None    # .pkl format systematics files for making bands
do_systematics        = False   # flag to include systematics bands in the plots

location              = ''      # path to ntuples
output_path           = ''      # path to output folder for plots & tables

cut_name              = ''      # name of current cut being applied
cut_word              = ''      # expression for the current cut
Ejemplo n.º 39
0
from namedtuple import namedtuple

FocusChangeEvent = namedtuple("FocusChangeEvent", "window") 
LoadingRulesEvent = namedtuple("LoadingRulesEvent", "state")
MicrophoneEvent = namedtuple("MicrophoneEvent", "state")
RuleActivateEvent = namedtuple("RuleActivateEvent", "rule")
RuleDeactivateEvent = namedtuple("RuleDeactivateEvent", "rule")
RuleMatchEvent = namedtuple("RuleMatchEvent", "hash phrase extras words")
RuleRegisterEvent = namedtuple("RuleRegisterEvent", "rule")
WindowListEvent = namedtuple("WindowListEvent", "windows") 
WordEvent = namedtuple("WordEvent", "words")
WordListEvent = namedtuple("WordListEvent", "name words")
RecognitionStateEvent = namedtuple("RecognitionStateEvent", "state") 
PedalsEvent = namedtuple("PedalsEvent", "pedalStates changed")
ExitEvent = namedtuple("ExitEvent", [])
RepeatRequestEvent = namedtuple("RepeatRequestEvent", [])

# emacs events
BufferListEvent = namedtuple("BufferListEvent", "query choices") 
EmacsConnectedEvent = namedtuple("EmacsConnectedEvent", [])
EmacsSymbolEvent = namedtuple("EmacsSymbolEvent", "choices")
EmacsWordEvent = namedtuple("EmacsWordEvent", "choices")
MajorModeEvent = namedtuple("MajorModeEvent", "modeList")
NickEvent = namedtuple("NickEvent", "choices")
ProjectFileListEvent = namedtuple("BufferListEvent", "choices") 
ProjectListEvent = namedtuple("ProjectListEvent", "choices") 

class ConnectedEvent(object): pass
class DisconnectedEvent(object): pass
class EventsDrainedEvent(object): pass
class RestartEvent(object): pass
150217.1635 hpl added logic to fix wrongly scaled sham session recordings
150309.1610 hpl merge in ablty to write partial files
151005.1400 hpl some debugging changes
160304.0844 hpl try reading optional ot values as last "stream" value
160426.1330 hpl dummy in broken raw file events (no events header written in 440i)
160515.1130 hpl yet another trap on corrupt file decoding
"""

import struct, glob, string, os, os.path, sys, shutil
import mx.DateTime

import namedtuple
HEADR = namedtuple.namedtuple('HEADR', [
    'filetype', 'formatcode', 'datecode', 'timecode', 'clientcode',
    'clientname', 'machineID', 'clockrate', 'numberofchannels', 'formatstring',
    'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11',
    'F12', 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F20', 'F21',
    'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31',
    'cboffset', 'maxblocksize'
])
hgfmt = '=4sHLL32s64sLHH16s32LLL'
#####cbfmt='=LLHHL8sHHL8s'
CHANFMT = namedtuple.namedtuple('CHANFMT', [
    'typecode', 'channel', 'subset', 'datatype', 'ID', 'scalefactor',
    'dboffset'
])
hchanfmt = '=BBBBLdL'  #now use different now use HBBLdL instead of HHLdL
DBFMT = namedtuple.namedtuple(
    'DBFMT', ['length', 'typecode', 'datacode', 'timestamp', 'nextoffset'])
hdbfmt = '=LBBLL'  # now use LBBLL instead of LHLL
hctl = '=LL'  # control block header
hcb = '=HHL8s'  # ctl block subhead