Python cobble Examples, bbcflib.gfminer.common.cobble Python Examples

Example #1

0

Show file

File: test_gfminer.py Project: JoseEspinosa/bbcflib

    def test_cobble(self): # more tests below
        stream = fstream([('chr1',10,20,'A',1),('chr1',12,22,'B',-1),('chr1',15,25,'C',-1)],
                         fields = ['chr','start','end','name','strand'])
        expected = [('chr1',10,12,'A',1),
                    ('chr1',12,15,'A|B',0),
                    ('chr1',15,20,'A|B|C',0),
                    ('chr1',20,22,'B|C',-1),
                    ('chr1',22,25,'C',-1)]
        res = list(cobble(stream))
        self.assertEqual(res,expected)

        # stranded = True
        stream = fstream([('chr1',10,20,'A',1),('chr1',12,22,'B',-1),('chr1',15,25,'C',-1)],
                         fields = ['chr','start','end','name','strand'])
        expected = [('chr1',10,20,'A',1),
                    ('chr1',12,15,'B',-1),
                    ('chr1',15,22,'B|C',-1),
                    ('chr1',22,25,'C',-1)]
        res = list(cobble(stream,stranded=True))
        self.assertEqual(res,expected)

        # scored = True
        stream = fstream([('chr1',10,20,'A',1,50.0),('chr1',12,22,'B',-1,100.0),('chr1',15,65,'C',-1,20.0)],
                         fields = ['chr','start','end','name','strand','score'])
        expected = [('chr1',10,12,'A',1, 10.0),
                    ('chr1',12,15,'A|B',0, 45.0),
                    ('chr1',15,20,'A|B|C',0, 77.0),
                    ('chr1',20,22,'B|C',-1, 20.8),
                    ('chr1',22,65,'C',-1, 17.2)]
        res = list(cobble(stream,scored=True))
        self.assertEqual(res,expected)

Example #2

0

Show file

File: test_gfminer.py Project: MolbioUnige/bbcflib

    def test_cobble(self):  # more tests below
        stream = fstream([('chr1', 10, 20, 'A', 1), ('chr1', 12, 22, 'B', -1),
                          ('chr1', 15, 25, 'C', -1)],
                         fields=['chr', 'start', 'end', 'name', 'strand'])
        expected = [('chr1', 10, 12, 'A', 1), ('chr1', 12, 15, 'A|B', 0),
                    ('chr1', 15, 20, 'A|B|C', 0), ('chr1', 20, 22, 'B|C', -1),
                    ('chr1', 22, 25, 'C', -1)]
        res = list(cobble(stream))
        self.assertEqual(res, expected)

        # stranded = True
        stream = fstream([('chr1', 10, 20, 'A', 1), ('chr1', 12, 22, 'B', -1),
                          ('chr1', 15, 25, 'C', -1)],
                         fields=['chr', 'start', 'end', 'name', 'strand'])
        expected = [('chr1', 10, 20, 'A', 1), ('chr1', 12, 15, 'B', -1),
                    ('chr1', 15, 22, 'B|C', -1), ('chr1', 22, 25, 'C', -1)]
        res = list(cobble(stream, stranded=True))
        self.assertEqual(res, expected)

        # scored = True
        stream = fstream(
            [('chr1', 10, 20, 'A', 1, 50.0), ('chr1', 12, 22, 'B', -1, 100.0),
             ('chr1', 15, 65, 'C', -1, 20.0)],
            fields=['chr', 'start', 'end', 'name', 'strand', 'score'])
        expected = [('chr1', 10, 12, 'A', 1, 10.0),
                    ('chr1', 12, 15, 'A|B', 0, 45.0),
                    ('chr1', 15, 20, 'A|B|C', 0, 77.0),
                    ('chr1', 20, 22, 'B|C', -1, 20.8),
                    ('chr1', 22, 65, 'C', -1, 17.2)]
        res = list(cobble(stream, scored=True))
        self.assertEqual(res, expected)

Example #3

0

Show file

def combine(trackList, fn, win_size=1000, aggregate={}):
    """
    Applies a custom function to a list of tracks, such as union, intersection,
    etc., and return a single result track. The input streams need to be ordered
    w.r.t 'chr', 'start' and 'end'. To be applied chromosome by chromosome.

    Only fields of the first track are kept. Values for a common field are
    merged by default according to `common.strand_merge`,`common.no_merge` and `common.generic_merge`,
    respectively for strand, chromosome and all others.

    :param trackList: list of FeatureStream objects.
    :param fn: boolean function to apply, such as bbcflib.gfminer.stream.union.
    :param win_size: (int) window size, in bp.
    :param aggregate: (dict) for each field name given as a key, its value is the function
        to apply to the vector containing all trackList's values for this field in order
        to merge them. E.g. ``{'score': lambda x: sum(x)/len(x)}`` will return the average of
        all *trackList*'s scores in the output.
    :rtype: FeatureStream
    """
    aggregate.setdefault('strand',common.strand_merge)
    aggregate.setdefault('chr',common.no_merge)
    _f = ['start','end']
    if all('chr' in t.fields for t in trackList):
        _f += ['chr']
    if isinstance(fn,str): fn = eval(fn) # can type "combine(...,fn='intersection')"
    trackList = [common.cobble(common.reorder(t,fields=_f)) for t in trackList]
    return common.fusion(FeatureStream(_combine(trackList,fn,win_size,aggregate),
                                       fields=trackList[0].fields))

Example #4

0

Show file

File: find_motifs.py Project: delafont/random_scripts

def fimo(motifs,fasta,qval=True):
    # Run Fimo
    if qval:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.01 --qv-thresh"
    else:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.000001"
    cmd = "fimo " + options + " %s %s" % (motifs, fasta)
    print "Running >>",cmd
    os.system(cmd)
    os.system("sort -k2,2n -k3,3n -k4,4n fimo_out/fimo.txt > fimo.txt")

    # Bed output
    t = track('fimo.txt', fields=["name","chr","start","end","strand","score","p-value","q-value","sequence"])
    t.fields = ["name","chr","start","end","strand","a","score","q","sequence"]
    s = t.read()
    s = select(s,['chr','start','end','name','score','strand'])
    s = apply(s,'chr',lambda x:x.split('|')[1])
    s = sorted_stream(s)
    s = cobble(s)
    s = apply(s,'name',lambda x:'|'.join(list(set(x.split('|')))))
    outname = 'fimo.bed'
    bed = track(outname,fields=s.fields)
    bed.make_header(name="TSS_motifs", description="Motifs +-XKb around TSS", mode='overwrite')
    bed.write(s)
    if os.path.exists("fimo_out"): shutil.rmtree("fimo_out")

Example #5

0

Show file

def fimo(motifs, fasta, qval=True):
    # Run Fimo
    if qval:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.01 --qv-thresh"
    else:
        options = "--max-stored-scores 1000000 --verbosity 1 --thresh 0.000001"
    cmd = "fimo " + options + " %s %s" % (motifs, fasta)
    print "Running >>", cmd
    os.system(cmd)
    os.system("sort -k2,2n -k3,3n -k4,4n fimo_out/fimo.txt > fimo.txt")

    # Bed output
    t = track('fimo.txt',
              fields=[
                  "name", "chr", "start", "end", "strand", "score", "p-value",
                  "q-value", "sequence"
              ])
    t.fields = [
        "name", "chr", "start", "end", "strand", "a", "score", "q", "sequence"
    ]
    s = t.read()
    s = select(s, ['chr', 'start', 'end', 'name', 'score', 'strand'])
    s = apply(s, 'chr', lambda x: x.split('|')[1])
    s = sorted_stream(s)
    s = cobble(s)
    s = apply(s, 'name', lambda x: '|'.join(list(set(x.split('|')))))
    outname = 'fimo.bed'
    bed = track(outname, fields=s.fields)
    bed.make_header(name="TSS_motifs",
                    description="Motifs +-XKb around TSS",
                    mode='overwrite')
    bed.write(s)
    if os.path.exists("fimo_out"): shutil.rmtree("fimo_out")

Example #6

0

Show file

File: VennDiagram.py Project: bbcf/bsPlugins

    def __call__(self, **kw):

        def _parse_logic(string):
            s = re.sub(r'[^\w\d!=><\. ]', '', string)
            s = re.sub(r' OR ', ')or(%f ', s)
            s = re.sub(r' AND ', ')and(%f ', s)
            return "(%f "+s+")"

        def _run_test(row, indx, cond):
            num = float(row[col_ind[indx]])
            num = max(-sys.maxint,min(sys.maxint,num))
            num = (num,)*c.count("%f")
            return eval(cond % (num))

        def _add_label(s,x):
            _f = s.fields+['track_name']
            return FeatureStream((y+(x,) for y in s), fields=_f)

        venn_options = {} # tune it here
        tracks = []
        intype = kw.get("input_type") or "Table"
        if intype == "Table":
            s_cols = kw.get('id_columns','')
            s_filters = kw.get('filters','')
            infile = track(kw.get('table',''),format='txt',header=True)
            col_ind = [int(i)-1 for i in s_cols.split(",")]
            legend = [infile.fields[i] if i<len(infile.fields) else str(i) for i in col_ind]
            conds = [_parse_logic(x) for x in s_filters.split(",")]
            tlabels = [chr(k+65) for k in range(len(col_ind))]
            conds += ["1"]*(len(col_ind)-len(conds))
            combn = [tuple(sorted(x)) for k in range(len(tlabels)) 
                     for x in combinations(tlabels,k+1)]
            c1 = dict(("|".join(c),0) for c in combn)
            c2 = dict(("|".join(c),0) for c in combn)
            indx = dict((c,[tlabels.index(x) for x in c]) for c in combn)
            for row in infile:
                tests = [_run_test(row,i,c) for i,c in enumerate(conds)]
                for c in combn:
                    c1["|".join([tlabels[n] for n,t in enumerate(tests) if t])] += 1
                    c2["|".join(c)] += all([tests[i] for i in indx[c]])
            nsamples = len(col_ind)
            combn = ['|'.join(y) for x in combn for y in x]
        elif intype == "Tracks":
            #filenames = kw['TrMulti']['files']
            filenames = kw['files']
            if not isinstance(filenames,(list,tuple)): filenames = [filenames]
            for f in filenames: assert os.path.exists(f), "File not found: %s ." % f
            tracks = [track(f,chrmeta='guess') for f in filenames]
            nsamples = len(tracks)
            tlabels = [chr(k+65) for k in range(len(tracks))]
            combn = [combinations(tlabels,k+1) for k in range(len(tlabels))]
            combn = ['|'.join(sorted(y)) for x in combn for y in x]
            c1 = dict(zip(combn,[0]*len(combn)))
            c2 = dict(zip(combn,[0]*len(combn)))
            total_cov = 0.0
            _scored = (kw.get('type') == 'score')
            chromset = set([c for t in tracks for c in t.chrmeta])
            for chrom in chromset:
                streams = [_add_label(t.read(chrom),tlabels[n]) for n,t in enumerate(tracks)]
                s = cobble(concatenate(streams),scored=_scored)
                name_idx = s.fields.index('track_name')
                start_idx = s.fields.index('start')
                end_idx = s.fields.index('end')
                if _scored: score_idx = s.fields.index('score')
                for x in s:
                    length = x[end_idx]-x[start_idx]
                    total_cov += length
                    sub = sorted(list(set(x[name_idx].split('|')))) # avoid 'A|A'
                    cb = [combinations(sub,k) for k in range(1,len(sub)+1)]
                    cb = ['|'.join(sorted(y)) for c in cb for y in c]
                    if _scored:
                        c1['|'.join(sub)] += x[score_idx]
                        for c in cb: c2[c] += x[score_idx]
                    else:
                        c1['|'.join(sub)] += length
                        for c in cb: c2[c] += length
            if total_cov < 1:
                output = self.temporary_path(fname='venn_summary.txt')
                with open(output,'wb') as summary:
                    summary.write("Empty content (no coverage) on %s." %(",".join(chromset)))
                self.new_file(output, 'venn_summary')
                return
            legend = [t.name for t in tracks]
            if _scored:
                for c in combn:
                    c2[c] = round(c2[c])
            else:
                for c in combn:
                    c2[c] = round((100*c2[c])/total_cov)
                    c1[c] = (100*c1[c])/total_cov
        else:
            raise ValueError("Input type '%s' not supported." %intype)


        if nsamples <= 4:
            format = kw.get('output') or 'pdf'
            output = self.temporary_path(fname='venn_diagram.'+format)
            venn(c2,legend=legend,options=venn_options,output=output,format=format)
            self.new_file(output, 'venn_diagram')

        # Text summary
        output = self.temporary_path(fname='venn_summary.txt')
        with open(output,'w') as summary:
            summary.write("%s\t%s\t%s\n" % ("Group","Coverage", "Cumulative coverage"))
            record = "%s\t%.2f\t%d\n"
            for c in sorted(combn, key=lambda x:(len(x),x)):
                summary.write(record%(c,c1[c],c2[c]))
        self.new_file(output, 'venn_summary')
        return self.display_time()

Example #7

0

Show file

File: test_gfminer.py Project: MolbioUnige/bbcflib

 def commonTest(self, X, R):
     T = list(cobble(fstream(X, fields=['chr', 'start', 'end', 'score'])))
     print T
     self.assertEqual(T, R)

Example #8

0

Show file

    def __call__(self, **kw):
        def _parse_logic(string):
            s = re.sub(r'[^\w\d!=><\. ]', '', string)
            s = re.sub(r' OR ', ')or(%f ', s)
            s = re.sub(r' AND ', ')and(%f ', s)
            return "(%f " + s + ")"

        def _run_test(row, indx, cond):
            num = float(row[col_ind[indx]])
            num = max(-sys.maxint, min(sys.maxint, num))
            num = (num, ) * c.count("%f")
            return eval(cond % (num))

        def _add_label(s, x):
            _f = s.fields + ['track_name']
            return FeatureStream((y + (x, ) for y in s), fields=_f)

        venn_options = {}  # tune it here
        tracks = []
        intype = kw.get("input_type") or "Table"
        if intype == "Table":
            s_cols = kw.get('id_columns', '')
            s_filters = kw.get('filters', '')
            infile = track(kw.get('table', ''), format='txt', header=True)
            col_ind = [int(i) - 1 for i in s_cols.split(",")]
            legend = [
                infile.fields[i] if i < len(infile.fields) else str(i)
                for i in col_ind
            ]
            conds = [_parse_logic(x) for x in s_filters.split(",")]
            tlabels = [chr(k + 65) for k in range(len(col_ind))]
            conds += ["1"] * (len(col_ind) - len(conds))
            combn = [
                tuple(sorted(x)) for k in range(len(tlabels))
                for x in combinations(tlabels, k + 1)
            ]
            c1 = dict(("|".join(c), 0) for c in combn)
            c2 = dict(("|".join(c), 0) for c in combn)
            indx = dict((c, [tlabels.index(x) for x in c]) for c in combn)
            for row in infile:
                tests = [_run_test(row, i, c) for i, c in enumerate(conds)]
                for c in combn:
                    c1["|".join([tlabels[n] for n, t in enumerate(tests)
                                 if t])] += 1
                    c2["|".join(c)] += all([tests[i] for i in indx[c]])
            nsamples = len(col_ind)
            combn = ['|'.join(y) for x in combn for y in x]
        elif intype == "Tracks":
            filenames = kw['TrMulti']['files']
            if not isinstance(filenames, (list, tuple)):
                filenames = [filenames]
            for f in filenames:
                assert os.path.exists(f), "File not found: %s ." % f
            tracks = [track(f, chrmeta='guess') for f in filenames]
            nsamples = len(tracks)
            tlabels = [chr(k + 65) for k in range(len(tracks))]
            combn = [combinations(tlabels, k + 1) for k in range(len(tlabels))]
            combn = ['|'.join(sorted(y)) for x in combn for y in x]
            c1 = dict(zip(combn, [0] * len(combn)))
            c2 = dict(zip(combn, [0] * len(combn)))
            total_cov = 0.0
            _scored = (kw.get('type') == 'score')
            chromset = set([c for t in tracks for c in t.chrmeta])
            for chrom in chromset:
                streams = [
                    _add_label(t.read(chrom), tlabels[n])
                    for n, t in enumerate(tracks)
                ]
                s = cobble(concatenate(streams), scored=_scored)
                name_idx = s.fields.index('track_name')
                start_idx = s.fields.index('start')
                end_idx = s.fields.index('end')
                if _scored: score_idx = s.fields.index('score')
                for x in s:
                    length = x[end_idx] - x[start_idx]
                    total_cov += length
                    sub = sorted(list(set(
                        x[name_idx].split('|'))))  # avoid 'A|A'
                    cb = [combinations(sub, k) for k in range(1, len(sub) + 1)]
                    cb = ['|'.join(sorted(y)) for c in cb for y in c]
                    if _scored:
                        c1['|'.join(sub)] += x[score_idx]
                        for c in cb:
                            c2[c] += x[score_idx]
                    else:
                        c1['|'.join(sub)] += length
                        for c in cb:
                            c2[c] += length
            if total_cov < 1:
                output = self.temporary_path(fname='venn_summary.txt')
                with open(output, 'wb') as summary:
                    summary.write("Empty content (no coverage) on %s." %
                                  (",".join(chromset)))
                self.new_file(output, 'venn_summary')
                return
            legend = [t.name for t in tracks]
            if _scored:
                for c in combn:
                    c2[c] = round(c2[c])
            else:
                for c in combn:
                    c2[c] = round((100 * c2[c]) / total_cov)
                    c1[c] = (100 * c1[c]) / total_cov
        else:
            raise ValueError("Input type '%s' not supported." % intype)

        if nsamples <= 4:
            format = kw.get('format') or 'pdf'
            output = self.temporary_path(fname='venn_diagram.' + format)
            venn(c2,
                 legend=legend,
                 options=venn_options,
                 output=output,
                 format=format)
            self.new_file(output, 'venn_diagram')

        # Text summary
        output = self.temporary_path(fname='venn_summary.txt')
        with open(output, 'w') as summary:
            summary.write("%s\t%s\t%s\n" %
                          ("Group", "Coverage", "Cumulative coverage"))
            record = "%s\t%.2f\t%d\n"
            for c in sorted(combn, key=lambda x: (len(x), x)):
                summary.write(record % (c, c1[c], c2[c]))
        self.new_file(output, 'venn_summary')
        return self.display_time()

Example #9

0

Show file

File: test_gfminer.py Project: JoseEspinosa/bbcflib

 def commonTest(self,X,R):
     T = list(cobble(fstream(X,fields=['chr','start','end','score'])))
     print T
     self.assertEqual(T,R)