Esempio n. 1
0
def fromCache( cache, 
               tracks = None, 
               slices = None,
               groupby = "slice" ):
    '''return a data tree from cache'''

    data = DataTree()
    keys = [ x.split("/") for x in cache.keys()]

    if tracks == None: tracks = set([ x[0] for x in keys] )
    else: tracks = tracks.split(",")

    if slices == None: slices = set([ x[1] for x in keys] )
    else: slices = slices.split(",")
    
    def tokey( track, slice ):
        return "/".join( (track,slice))

    if groupby == "slice" or groupby == "all":
        for slice in slices:
            data[slice]=odict()
            for track in tracks:
                data[slice][track] = cache[tokey(track,slice)]
    elif groupby == "track":
        for track in tracks:
            data[track]=odict()
            for slice in slices:
                data[track][slice] = cache[tokey(track,slice)]
    return data
    def transform(self, data, path):
        debug( "%s: called" % str(self))

        if len(data) == 0: return data
        
        keys = data.keys()

        if self.labels: 
            labels = data[self.labels]
            del keys[keys.index(self.labels)]
            if len(keys) < 1: 
                raise ValueError( "TransformerToLabels requires at least two arrays, got only 1, if tf-labels is set" )
        else: 
            max_nkeys = max([len(x) for x in data.values() ])
            labels = range(1, max_nkeys + 1)

        labels = map(str, labels)

        if len(data) == 2:
            new_data = odict(zip(labels, data[keys[0]]))
        else:
            new_data = odict()
            for key in keys:
                new_data[key] = odict(zip(labels, data[key]))
                
        return new_data
    def transform(self, data, path ):
        debug( "%s: called" % str(self))

        if len(data.keys()) < 2:
            raise ValueError( "expected at least two arrays, got only %s." % str(data.keys()) )

        pairs = itertools.combinations( data.keys(), 2)

        new_data = odict()

        for x in data.keys(): new_data[x] = odict()
        
        for x,y in pairs:
            xvals, yvals = data[x], data[y]
            if self.paired:
                if len(xvals) != len(yvals):
                    raise ValueError("expected to arrays of the same length, %i != %i" % (len(xvals),
                                                                                          len(yvals)))
                take = [i for i in range(len(xvals)) if xvals[i] != None and yvals[i] != None ]
                xvals = [xvals[i] for i in take ]
                yvals = [yvals[i] for i in take ]

            try:
                result = self.apply( xvals, yvals )
            except ValueError, msg:
                warn( "pairwise computation failed: %s" % msg)
                continue

            new_data[x][y] = result
            new_data[y][x] = result
Esempio n. 4
0
 def __call__(self, track, slice = None):
     if slice == "slice1":
         return odict( (("column1", 10),
                        ("column2", 20 ),) )
     elif slice == "slice2":
         return odict ( (("column1", 20),
                         ("column2", 10),
                         ("column3", 5),) )
Esempio n. 5
0
 def __call__(self, track):
     return odict(
         self.get(
             """SELECT feature, SUM( percent_coverage) AS coverage FROM %(tablename)s 
                            WHERE contig = '%(track)s' GROUP BY feature """
         )
     )
 def __call__(self, track, slice = None):
     s = [random.randint(0,20) for x in range(40)]
     random.shuffle( s )
     # do the plotting
     R.x11()
     R.plot( s, s )
     return odict( (("text", "#$rpl %i$#" % getCurrentRDevice()),) )
Esempio n. 7
0
File: SNP.py Progetto: siping/cgat
    def __call__(self, track, slice = None ):

        columns = ("snp",
                   "nreads",
                   "contig",
                   "position",
                   "reference_base",
                   "consensus_base",
                   "consensus_quality",
                   "snp_quality",
                   "rms_mapping_quality",
                   "coverage",
                   "read_bases" )

        # can't use - gives rest parsing errors
        #                   "base_qualities" )
        
        fields = ",".join(columns)
        suffix = self.suffix
        statement = '''
        SELECT %(fields)s FROM %(track)s_%(suffix)s WHERE snp = '%(slice)s'
        ''' % locals()
        data = self.get(statement)
        
        data = [ x[:-2] + ('``%s``' % x[-1],) for x in data ]

        return odict( zip( (columns),
                           zip(*data) ) )
Esempio n. 8
0
    def __call__(self, track, slice=None):

        edir = EXPORTDIR

        toc_text = []
        link_text = []

        filenames = sorted([x.asFile() for x in TRACKS])

        for fn in filenames:
            if PE == "True":
                fn1 = fn + ".1"
                fn2 = fn + ".2"
                toc_text.append("* %(fn1)s_" % locals())
                toc_text.append("* %(fn2)s_" % locals())
                link_text.append(
                    ".. _%(fn1)s: %(edir)s/fastqc/%(fn1)s_fastqc/fastqc_report.html" % locals())
                link_text.append(
                    ".. _%(fn2)s: %(edir)s/fastqc/%(fn2)s_fastqc/fastqc_report.html" % locals())
            else:
                toc_text.append("* %(fn)s_" % locals())
                link_text.append(
                    ".. _%(fn)s: %(edir)s/fastqc/%(fn)s_fastqc/fastqc_report.html" % locals())

        toc_text = "\n".join(toc_text)
        link_text = "\n".join(link_text)

        rst_text = '''
%(toc_text)s

%(link_text)s
''' % locals()

        return odict((("text", rst_text),))
Esempio n. 9
0
 def __call__(self, track, slice = None ):
     c = "%s_FPKM" % slice
     table = track + "_levels"
     if c not in self.getColumns( table ): return None
     statement = '''SELECT %(slice)s_fpkm FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f'''
     data = self.getValues( statement )
     return odict( (("fpkm", data ),) )
 def __call__(self, track, slice = None ):
     ANNOTATIONS_NAME = P['annotations_name']
     try: 
         data1 = self.getValue( """SELECT count(distinct gene_id) as intervals
                                     FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_overlap
                                     where (genes_nover>0 OR downstream_flank_nover>0 OR upstream_flank_nover>0)""" % locals() )
     except:
         data1 = "0"
     try: 
         data2 = self.getValue( """SELECT count(distinct gene_id) as intervals
                                    FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_noncoding_tss_distance
                                    where closest_dist < 1000""" % locals() )
     except:
         data2 = "0"
     try:
         data3 = self.getValue( """SELECT distinct count(distinct interval_id) as intervals, "enhancer" as feature_class 
                                    FROM %(track)s_replicated_h3k4me1_intervals""" % locals() )
     except:
         data3 = "0"
     try:
         data4 = self.getValue( """SELECT count(distinct gene_id) as intervals
                                     FROM %(track)s_replicated_rnaseq_tss_distance
                                     where closest_dist < 1000""" % locals() )
     except:
         data4 = "0"
     try: 
         data5 = self.getValue( """SELECT count(distinct gene_id) as intervals
                                     FROM %(track)s_replicated_lncrna_tss_distance
                                     where closest_dist < 1000""" % locals() )
     except:
         data5 = "0"
     return odict( zip(("Protein-coding TSS","Non-coding TSS","H3K4Me1 Enhancer", "RNAseq transcript", "lincRNA TSS"), (data1, data2, data3, data4, data5)) )
Esempio n. 11
0
    def __call__(self, track, slice = None ):
        edir = EXPORTDIR
        geneset = track
        method = self.method

        rst_text = []

        for level in self.levels:
            for fn in (
                "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_heatmap.png",
                "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_scvplot.png",
                "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_pvalue_vs_length.png" ):
                f = fn % locals()
                if not os.path.exists(f): continue
                rst_text.append( ".. figure:: %(f)s" % locals() )
                
        if rst_text:
            rst_text = '''
%(geneset)s
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

''' % locals() + "\n\n".join( rst_text )
        else:
            rst_text = ""

        return odict( (("text", rst_text),) )
Esempio n. 12
0
    def transform(self, data, path):

        debug( "%s: called" % str(self))

        vals =  data.keys()
        new_data = odict()

        for x1 in range(len(vals)-1):
            n1 = vals[x1]
            # find the first field that fits
            for field in self.fields:
                if field in data[n1]:
                    d1 = data[n1][field]
                    break
            else:
                raise KeyError("could not find any match from '%s' in '%s'" % (str(data[n1].keys()), str(self.fields )))

            for x2 in range(x1+1, len(vals)):
                n2 = vals[x2]
                try:
                    d2 = data[n2][field]
                except KeyErrror:
                    raise KeyError("no field %s in '%s'" % sttr(data[n2]))

                ## check if array?
                if len(d1) != len(d2):
                    raise ValueError("length of elements not equal: %i != %i" % (len(d1), len(d2)))
                
                DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n1),
                                  d1 )

                DataTree.setLeaf( new_data, ( ("%s x %s" % (n1, n2) ), n2),
                                  d2 )
                                  
        return new_data
Esempio n. 13
0
    def transform(self, data, path):
        debug( "%s: called" % str(self))

        nfound = 0
        new_data = odict()

        for v in data.keys():
            other_fields = [ x for x in data[v].keys() if x != self.field ]
            for pos, val in enumerate(data[v][self.field]):
                if val not in new_data: new_data[val] = odict()
                if "group" not in new_data[val]: 
                    for o in other_fields:
                        new_data[val][o] = data[v][o][pos]
                    new_data[val]["group"] = ""
                new_data[val]["group"] += ",%s" % v

        return new_data
Esempio n. 14
0
def asMatrix(rownames, colnames, data):

    nrows, ncols = len(rownames), len(colnames)
    matrix = numpy.zeros((nrows, ncols))
    for x, y in data:
        try:
            matrix[x, y] += 1
        except IndexError:
            pass
    results = odict()
    for x in range(nrows):
        r = odict()
        for y in range(ncols):
            r[str(colnames[y])] = matrix[x, y]
        results[str(rownames[x])] = r

    return results
Esempio n. 15
0
 def __call__(self, track, slice = None ):
     c = "%s_FPKM" % slice
     table = track + "_levels"
     if c not in self.getColumns( table ): return None
     max_fpkm = float(self.getValue( '''SELECT max(%(slice)s_fpkm) FROM %(table)s'''))
     statement = '''SELECT CAST( %(slice)s_fpkm AS FLOAT) / %(max_fpkm)f FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f'''
     data = self.getValues( statement )
     return odict( (("percent of max(fpkm)", data),) )
Esempio n. 16
0
 def __call__(self, track, slice=None):
     vals = self.getValues( """SELECT avg(FPKM)
                                   FROM %(track)s_cuffcompare_tracking AS t,
                                        %(track)s_cuffcompare_transcripts AS a
                                   WHERE code = '%(slice)s' AND 
                                   a.transfrag_id = t.transfrag_id
                                   GROUP BY a.transfrag_id""" % locals() )
     return odict((("fpkm", vals), ))
Esempio n. 17
0
 def __call__(self, track, slice=None):
     c = "%s_FPKM" % slice
     table = track + "_levels"
     if c not in self.getColumns(table):
         return None
     statement = '''SELECT %(slice)s_fpkm FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f'''
     data = self.getValues(statement)
     return odict((("fpkm", data),))
Esempio n. 18
0
    def __call__(self, track, slice = None):

        scale = (3-int(track[-1])) 
        
        data = odict( (("x", range(0,50)),
                       ("y", [ x * scale for x in range(0,50) ] ) ) )
        
        return data
Esempio n. 19
0
 def __call__(self, track, slice=None):
     vals = self.getValues("""SELECT avg(FPKM)
                                   FROM %(track)s_cuffcompare_tracking AS t,
                                        %(track)s_cuffcompare_transcripts AS a
                                   WHERE code = '%(slice)s' AND 
                                   a.transfrag_id = t.transfrag_id
                                   GROUP BY a.transfrag_id""" % locals())
     return odict((("fpkm", vals), ))
Esempio n. 20
0
    def __call__(self, track, slice=None):
        fn = "ortholog_pairs_with_feature.matrix2"
        if not os.path.exists(fn):
            return

        x = IOTools.openFile(fn)
        matrix, rownames, colnames = IOTools.readMatrix(x)
        return odict((("matrix", matrix), ("rows", rownames), ("columns", colnames)))
Esempio n. 21
0
def asMatrix(rownames, colnames, data):

    nrows, ncols = len(rownames), len(colnames)
    matrix = numpy.zeros((nrows, ncols))
    for x, y in data:
        try:
            matrix[x, y] += 1
        except IndexError:
            pass
    results = odict()
    for x in range(nrows):
        r = odict()
        for y in range(ncols):
            r[str(colnames[y])] = matrix[x, y]
        results[str(rownames[x])] = r

    return results
Esempio n. 22
0
    def __call__(self, track, slice = None):
        s = [random.randint(0,20) for x in range(40)]
        random.shuffle( s )

        # do the plotting
        fig = plt.figure()
        plt.plot( s )
        return odict( (("text", "#$mpl %i$#" % fig.number),) )
 def __call__(self, track, slice=None):
     data = self.get(
         "SELECT track, annotation, round(expected,0) as expected, observed, round(fold,1) as fold, pvalue FROM overlapped_genes_gat_results "
     )
     return odict(
         zip(("Dataset1", "Dataset2", "Expected overlap",
              "Observed overlap", "Fold Enrichment", "P-value"),
             zip(*data)))
Esempio n. 24
0
def setLeaf( work, path, data ):
    '''set leaf/branch at *path* to *data*.'''
    for x in path[:-1]:
        try:
            work = work[x]
        except KeyError:
            work[x] = odict()
            work = work[x]
    work[path[-1]] = data
Esempio n. 25
0
    def __call__(self, track, slice=None):
        fn = "ortholog_pairs_with_feature.matrix2"
        if not os.path.exists(fn):
            return

        x = IOTools.openFile(fn)
        matrix, rownames, colnames = IOTools.readMatrix(x)
        return odict(
            (('matrix', matrix), ('rows', rownames), ('columns', colnames)))
Esempio n. 26
0
 def __call__(self, track, slice = None ):
     c = "%s_FPKM" % slice
     table = track + "_levels"
     if c not in self.getColumns( table ): return None
     # divide by two to get relative error
     statement = '''SELECT (%(slice)s_conf_hi - %(slice)s_conf_lo ) / %(slice)s_fpkm / 2
                    FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f'''
     data = self.getValues( statement )
     return odict( (("relative_error", data ),) )
Esempio n. 27
0
File: SNP.py Progetto: siping/cgat
 def __call__(self, track, slice = None ):
     columns = ("snp", "contig", "pos", "reference", "alleles", "url")
     data = self.get( """
     SELECT snp, contig, pos, reference, alleles, url
     FROM snps_of_interest
     """)
     
     return odict( zip( (columns),
                        zip(*data ) ) )
Esempio n. 28
0
 def __call__(self, track, slice = None):
     data = []
     if slice == "slice1":
         for x in range(len(self.mColumns)):
             data.append( [ y + random.gauss( 0, 0.2 ) for y in range(20) ] )
     elif slice == "slice2":
         for x in range(len(self.mColumns)):
             data.append( [ y + random.gauss( 0, 0.5 ) for y in range(20) ] )
     return odict( zip(self.mColumns, data) )
Esempio n. 29
0
    def __call__(self, track, slice = None ):
        rst_text = '''
This is a preface

.. figure:: %s

Some more text for the figure\n''' % track

        return odict( (("rst", rst_text),) )
Esempio n. 30
0
    def __call__(self, track, slice = None ):
        statement = '''SELECT pas.TRACK, pas.total_reads/2 as Total_read_pairs, pas.reads_aligned_in_pairs/2 as Aligned_pairs, 
                       ROUND((pas.reads_aligned_in_pairs/2)*(1-strand_balance),0) as reverse, pds.read_pair_duplicates as duplicate_pairs
                       FROM picard_stats_alignment_summary_metrics pas, picard_duplicates_duplicate_metrics pds
                       WHERE pas.track=pds.track and pas.category='PAIR';'''
        data = self.get( statement )
        result = odict()

        # Define tracks as first column
        for d in data:             
            result[d[0]] = odict()

        # Define slices as other columns
        for d in data:
            for s, v in zip( ("total", "mapped", "reverse", "duplicates"), d[1:]):
                result[d[0]][s] = v
        #print result
        return result
Esempio n. 31
0
 def __call__(self, track, slice=None):
     c = "%s_FPKM" % slice
     table = track + "_levels"
     if c not in self.getColumns(table):
         return None
     max_fpkm = float(
         self.getValue( '''SELECT max(%(slice)s_fpkm) FROM %(table)s'''))
     statement = '''SELECT CAST( %(slice)s_fpkm AS FLOAT) / %(max_fpkm)f FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f'''
     data = self.getValues(statement)
     return odict((("percent of max(fpkm)", data),))
Esempio n. 32
0
 def __call__(self, track, slice=None):
     c = "%s_FPKM" % slice
     table = track + "_levels"
     if c not in self.getColumns(table):
         return None
     # divide by two to get relative error
     statement = '''SELECT (%(slice)s_conf_hi - %(slice)s_conf_lo ) / %(slice)s_fpkm / 2
                    FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f'''
     data = self.getValues(statement)
     return odict((("relative_error", data),))
Esempio n. 33
0
    def __call__(self, track, slice=None):
        statement = '''SELECT pas.TRACK, pas.total_reads/2 as Total_read_pairs, pas.reads_aligned_in_pairs/2 as Aligned_pairs, 
                       ROUND((pas.reads_aligned_in_pairs/2)*(1-strand_balance),0) as reverse, pds.read_pair_duplicates as duplicate_pairs
                       FROM picard_stats_alignment_summary_metrics pas, picard_duplicates_duplicate_metrics pds
                       WHERE pas.track=pds.track and pas.category='PAIR';'''
        data = self.get(statement)
        result = odict()

        # Define tracks as first column
        for d in data:
            result[d[0]] = odict()

        # Define slices as other columns
        for d in data:
            for s, v in zip(("total", "mapped", "reverse", "duplicates"),
                            d[1:]):
                result[d[0]][s] = v
        #print result
        return result
Esempio n. 34
0
 def __call__(self, track, slice = None):
     if slice == "slice1":
         return odict( ( 
                 ("column1", dict( ( ('data', 20), 
                                     ('error',5), 
                                     ('label','**' ) ) )),
                 ("column2", dict( ( ('data', 10), 
                                     ('error',2), 
                                     ('label', '*' ) ) ) )
                 )) 
     elif slice == "slice2":
         return odict( ( 
                 ("column1", dict( ( ('data', 20), 
                                     ('error',5),
                                     ('label','***' ) ) )),
                 ("column2", dict( ( ('data', 10), 
                                     ('error',1))) ),
                 ("column3", dict( ( ('data', 30), 
                                     ('error',4))) ),
                 ) )
    def __call__(self, track, slice = None ):

        where = self.mWhere
        select = self.mSelect
        table = self.mTable

        if slice == "all" or slice == None:
            data = self.getFirstRow( "%(select)s FROM %(track)s_%(table)s WHERE %(where)s" % locals() )
        else:
            data = self.getFirstRow( "%(select)s FROM %(track)s_%(table)s WHERE %(where)s AND is_%slices" % locals() )
      
        return odict( zip(self.mColumns, data) )
Esempio n. 36
0
    def __call__(self, track, slice = None ):

        annotations = self.mAnnotations
        table = self.mTable
        column, where = self.mColumn, self.mWhere
        if not slice or slice == "all":
            data = self.getValues( """SELECT %(column)s FROM %(track)s_%(table)s AS d WHERE %(where)s""" % locals() )
        else:
            data = self.getValues( """SELECT %(column)s FROM %(track)s_%(table)s AS d, %(track)s_%(annotations)s as a 
                                      WHERE d.gene_id = a.gene_id AND a.is_%(slice)s AND %(where)s""" % locals() )

        hist, bins = numpy.histogram( data, bins=numpy.arange(0, max(data) + 1, 1) )
        return odict( zip( map(str, bins[:-1]), hist) )
Esempio n. 37
0
    def __call__(self, track, slice = None ):

        blocks = ResultBlocks()

        block = '''
.. figure:: %(image)s
   :height: 300 
'''
        for image in glob.glob( os.path.join( IMAGEDIR, "*.png" )):
            blocks.append( ResultBlock( text = block % locals(),
                                        title = "image" ) )

        return odict( (("rst", "\n".join( Utils.layoutBlocks( blocks, layout = "columns-2"))),))
Esempio n. 38
0
    def __call__(self, track, slice = None ):
        
        pair1, pair2 = track
        
        a = self.get('''SELECT test_id, treatment_name, control_name FROM %(slice)s_%(pair1)s_gene_diff WHERE significant''')
        b = self.get('''SELECT test_id, treatment_name, control_name FROM %(slice)s_%(pair2)s_gene_diff WHERE significant''')

        a = set(map(str,a))
        b = set(map(str,b))
        
        return odict( ( (pair1, len(a)),
                        (pair2, len(b)),
                        ("shared", len(a.intersection(b)) ) ) )
Esempio n. 39
0
 def __call__(self, track, slice = None ):
     fn = "ortholog_pairs_with_feature.matrix"
     if not os.path.exists( fn ): 
         return
         
     x = open( fn )
     data = odict()
     for line in x:
         temp = line.split()
         name = temp[0]
         scores = temp[1:]
         data[name] = scores
     return data
Esempio n. 40
0
    def __call__(self, track, slice=None):

        where = self.mWhere
        select = self.mSelect
        table = self.mTable

        if slice == "all" or slice is None:
            data = self.getFirstRow(
                "%(select)s FROM %(track)s_%(table)s WHERE %(where)s" % locals())
        else:
            data = self.getFirstRow(
                "%(select)s FROM %(track)s_%(table)s WHERE %(where)s AND is_%slices" % locals())

        return odict(zip(self.mColumns, data))
Esempio n. 41
0
    def __call__(self, track, slice = None ):
        data = self.getValues( """SELECT count(distinct gene_id) as intervals FROM (
                               SELECT gene_id,
                               CASE WHEN  tss_gene_extended_pover1 > 0  THEN 'TSS'
                               WHEN upstream_flank_pover1 >0 THEN 'Upstream'
                               WHEN genes_pover1 > 0 THEN 'Gene'
                               WHEN downstream_flank_pover1 >0 THEN 'Downstream'
                               ELSE 'Intergenic'
                               END AS feature_class
                               FROM %(track)s_merged_ensembl_gene_overlap)
                               group by feature_class
                               order by feature_class asc""" % locals() )

        result = odict(zip(("Downstream","Gene","Intergenic","TSS","Upstream"),data))
        return result
Esempio n. 42
0
    def __call__(self, track, slice = None ):

        data = self.getFirstRow( """SELECT round(sum(length*pGC)/sum(length),3) as mean_GC,
                                    round(sum(length*CpG_ObsExp)/sum(length),3) as mean_CpG_ObsExp,
                                    round(sum(length*pCpG)/sum(length),3) as mean_CpG
                                    FROM annotations.genome
                                    WHERE id <> 'total'
                                    AND id not like 'chrX%%'
                                    AND id not like 'chrY%%'
                                    AND id not like 'chrW%%'
                                    AND id not like 'chrZ%%'
                                    AND id not like 'chrM%%'
                                    AND id not like '%%random%%' """ )
        mColumns = [ "GC content", "CpG Obs/Exp" , "CpG density" ]

        return odict( zip(mColumns, data) )
Esempio n. 43
0
 def __call__(self, track):
     r = odict()
     r["total"] = self.getValue(
         '''SELECT reads_total FROM bam_stats WHERE track = '%(track)s' ''' )
     mapped = self.getValue(
         '''SELECT reads_mapped FROM bam_stats WHERE track = '%(track)s' ''' )
     r["mapped"] = mapped
     statement = '''SELECT SUM(sense_nreads) + SUM(antisense_nreads) AS anysense,
                           CAST((SUM(sense_nreads) + SUM(antisense_nreads)) AS FLOAT) / %(mapped)i AS anysense_percent,
                           SUM(antisense_nreads) AS antisense,
                           CAST(SUM(antisense_nreads) AS FLOAT) / %(mapped)i AS antisense_percent,
                           SUM(sense_nreads) AS sense,
                           CAST(SUM(sense_nreads) AS FLOAT) / %(mapped)i AS sense_percent,
                           CAST(SUM(antisense_nreads) AS FLOAT)/ (SUM(antisense_nreads) + SUM(sense_nreads) ) AS ratio
                           FROM %(track)s_%(section)s_coverage '''
     r.update(self.getRow(statement))
     return r
Esempio n. 44
0
    def __call__(self, track, slice=None):
        table = self.mTable
        where = self.mWhere
        data = self.getValues( """ SELECT count(distinct gene_id) as intervals FROM (
                                   SELECT gene_id,
                                   CASE WHEN  %(where)s > 0  THEN 'TSS'
                                   WHEN genes_pover1 > 0 THEN 'Gene'
                                   WHEN upstream_flank_pover1 >0 THEN 'Upstream'
                                   WHEN downstream_flank_pover1 >0 THEN 'Downstream'
                                   ELSE 'Intergenic'
                                   END AS feature_class
                                   FROM %(track)s%(table)s)
                                   group by feature_class
                                   order by feature_class asc""" % locals() )

        result = odict(
            zip(("Downstream", "Gene", "Intergenic", "TSS", "Upstream"), data))
        return result
Esempio n. 45
0
    def __call__(self, track, slice=None):

        data = self.getFirstRow("""SELECT 
                                sum(is_cds) AS cds, 
                                sum(is_utr) AS utr, 
                                sum(is_upstream) AS upstream, 
                                sum(is_downstream) AS downstream,
                                sum(is_intronic) AS intronic, 
                                sum(is_intergenic) AS intergenic, 
                                sum(is_flank) AS flank, 
                                sum(is_ambiguous) AS ambiguous 
                                FROM cgi_annotations""")
        mColumns = [
            "cds", "utr", "upstream", "downstream", "intronic", "intergenic",
            "flank", "ambiguous"
        ]

        return odict(zip(mColumns, data))
 def __call__(self, track, slice=None):
     ANNOTATIONS_NAME = P['annotations_name']
     try:
         data1 = self.getValue(
             """SELECT count(distinct gene_id) as intervals
                                     FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_overlap
                                     where (genes_nover>0 OR downstream_flank_nover>0 OR upstream_flank_nover>0)"""
             % locals())
     except:
         data1 = "0"
     try:
         data2 = self.getValue(
             """SELECT count(distinct gene_id) as intervals
                                    FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_noncoding_tss_distance
                                    where closest_dist < 1000""" % locals())
     except:
         data2 = "0"
     try:
         data3 = self.getValue(
             """SELECT distinct count(distinct interval_id) as intervals, "enhancer" as feature_class 
                                    FROM %(track)s_replicated_h3k4me1_intervals"""
             % locals())
     except:
         data3 = "0"
     try:
         data4 = self.getValue(
             """SELECT count(distinct gene_id) as intervals
                                     FROM %(track)s_replicated_rnaseq_tss_distance
                                     where closest_dist < 1000""" %
             locals())
     except:
         data4 = "0"
     try:
         data5 = self.getValue(
             """SELECT count(distinct gene_id) as intervals
                                     FROM %(track)s_replicated_lncrna_tss_distance
                                     where closest_dist < 1000""" %
             locals())
     except:
         data5 = "0"
     return odict(
         zip(("Protein-coding TSS", "Non-coding TSS", "H3K4Me1 Enhancer",
              "RNAseq transcript", "lincRNA TSS"),
             (data1, data2, data3, data4, data5)))
Esempio n. 47
0
    def __call__(self, track):

        try:
            logfileresult = summarizeLogFile(
                os.path.join(track + ".dir", "sphinxreport.log"))
        except IOError:
            return

        report_file = os.path.join(track + ".dir", "report.log")
        fn = os.path.abspath(
            os.path.join(track + ".dir", "report", "html", "pipeline.html"))

        r = odict()
        r["link"] = "`%(track)s <%(fn)s>`_" % locals()
        r["error"] = logfileresult.error
        r["warning"] = logfileresult.warning
        r["info"] = logfileresult.info
        r["debug"] = logfileresult.debug
        return r
Esempio n. 48
0
    def __call__(self, track, slice = None ):

        edir, method = EXPORTDIR, self.method
        rst_text = []
        
        geneset = track
        for level in self.levels:
            for x,y in itertools.combinations( EXPERIMENTS, 2 ):
                filename = "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_%(x)s_vs_%(y)s_significance.png" % locals()
                if not os.path.exists( filename ): continue

                rst_text.append('''
%(geneset)s %(level)s %(x)s vs %(y)s 
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

.. figure:: %(filename)s

''' % locals())
                            
        return odict( (("text", "\n".join( rst_text)),) )
Esempio n. 49
0
    def __call__(self, track, slice = None ):
        edir = EXPORTDIR
        level = "gene"
        geneset = slice
        method = self.method

        filename = "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_fit_%(track)s.png" % locals()

        # fitting information will not exist if there are no replicates
        if not os.path.exists( filename ): return None

        rst_text = '''
%(level)s %(track)s %(geneset)s
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

.. figure:: %(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_fit_%(track)s.png

.. figure:: %(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_residuals_%(track)s.png

''' % locals()

        return odict( (("text", rst_text),) )
Esempio n. 50
0
    def __call__(self, track):

        logfileresult = summarizeLogFile(
            os.path.join(track + ".dir", "sphinxreport.log"))
        report_file = os.path.join(track + ".dir", "report.log")

        toc_text = []
        link_text = []

        fn = os.path.join(track + ".dir", "report", "html", "pipeline.html")
        toc_text.append("* %(track)s_" % locals())
        link_text.append(".. _%(track)s: %(fn)s" % locals())

        toc_text = "\n".join(toc_text)
        link_text = "\n".join(link_text)

        rst_text = '''
%(toc_text)s

%(link_text)s
''' % locals()

        return odict((("text", rst_text), ))
Esempio n. 51
0
 def __call__(self, track, slice=None):
     return odict(
         (('mean',
           self.getValues('''SELECT mean FROM %(track)s_mappability''')), ))
Esempio n. 52
0
 def __call__(self, track):
     statement = '''SELECT sense_nreads 
                           FROM %(track)s_%(section)s_coverage 
                           ORDER BY gene_id'''
     return odict((("nreads", self.getValues(statement)), ))
Esempio n. 53
0
 def __call__(self, track):
     statement = '''SELECT CAST(antisense_nreads AS FLOAT)/ (antisense_nreads + sense_nreads ) AS proportion
                           FROM %(track)s_%(section)s_coverage 
                           ORDER BY gene_id'''
     return odict((("proportion", self.getValues(statement)), ))
Esempio n. 54
0
 def __call__(self, track):
     statement = '''SELECT nfailed FROM %(table)s WHERE %(track)s_nh > 0 LIMIT %(limit)i'''
     return odict(((track, self.getValues(statement)), ))
Esempio n. 55
0
 def __call__(self, track, slice=None):
     statement = '''SELECT SUM(CASE WHEN nover>0 THEN 1 ELSE 0 END) as with, SUM(CASE WHEN nover=0 THEN 1 ELSE 0 END) AS without
                    FROM %(track)s_merged_repeats '''
     return odict(zip(("with", "without"), self.getFirstRow(statement)))
Esempio n. 56
0
 def __call__(self, track, slice=None):
     statement = self.getStatement(track, slice)
     if not statement: return []
     return odict(zip(("with", "without"), self.getFirstRow(statement)))