Esempio n. 1
0
def run_one(case, t):
    # Input variables #
    # Input tracks #
    if t.get('tracks'):
        for k,v in t['tracks'].items():
            if type(v) == list:
                t['tracks'][k] = iter(v)
            else:
                if t.get('fields') and t['fields'].get(k):
                    with track.load(v) as x:
                        t['tracks'][k] = iter(list(x.read('chr1', fields=t['fields'][k])))
                else:
                    with track.load(v) as x:
                        t['tracks'][k] = iter(list(x.read('chr1')))
        kwargs = t.get('input', {})
        kwargs.update(t['tracks'])
    else:
        for i,v in enumerate(t['input']['list_of_tracks']):
            if type(v) == list:
                t['input']['list_of_tracks'][i] = iter(v)
            else:
                with track.load(v) as x: t['input']['list_of_tracks'][i] = iter(list(x.read('chr1')))
        kwargs = t['input']
    # Run it #
    case.assertEqual(list(t['fn'](**kwargs)), t['expected'])
Esempio n. 2
0
def fix_sqlite_db(database):
    db      = sqlite3.connect(database)
    cursor  = db.cursor()
    chr_name=  cursor.execute("SELECT name FROM chrNames")
    for chromosome in chr_name.fetchall():
        exist = cursor.execute("SELECT COUNT(name) FROM sqlite_master WHERE type='table' AND name='"+chromosome[0]+"';").fetchone()[0]
        if exist == 0:
            cursor.execute("DELETE FROM chrNames WHERE name=?", (chromosome[0],))
    #~ result  = cursor.execute("SELECT name FROM chrNames")
    #~ for i in result.fetchall():
        #~ cursor.execute("CREATE TABLE IF NOT EXISTS '"+i[0]+"'(start INTEGER, end INTEGER, score REAL);")
    db.commit()
    cursor.close()
    db.close()
    track_scanned_signal_tmp = unique_filename_in()
    track_scanned_signal = unique_filename_in()
    # temp result with duplicate element
    with new(track_scanned_signal_tmp, format = "sql", datatype= "quantitative") as t:
        with load(database) as b:
            t.chrmeta = b.chrmeta
            for chrom in b:
                for value in b.read(chrom, fields=['start','end','score', 'strand']):
                    if value[3] == 1:
                        t.write(chrom, (t.chrmeta[chrom]-(value[1]+1), t.chrmeta[chrom]-(value[0]), value[2],))
                    else:
                        t.write(chrom, (value[0:3],))
    # result without duplicate element id duplicate element exist take higher score
    with new(track_scanned_signal, format = "sql", datatype= "quantitative") as t:
        with load(database) as b:
            t.chrmeta = b.chrmeta
    db1     = sqlite3.connect(track_scanned_signal)
    db2     = sqlite3.connect(track_scanned_signal_tmp)
    cursor1 = db1.cursor()
    cursor2 = db2.cursor()
    chr_name=  cursor2.execute("SELECT name FROM chrNames")
    for chromosome in chr_name.fetchall():
        cursor1.execute("CREATE TABLE '"+chromosome[0]+"' (start INTEGER, end INTEGER, score REAL);")
        #~ values = cursor2.execute("SELECT DISTINCT start, end, score FROM '"+chromosome[0]+"'")*
        values = cursor2.execute    ("""
SELECT t.start, t.end, t.score FROM '"""+chromosome[0]+"""' t INNER JOIN (
    SELECT start, end, MAX(score) AS MAXSCORE FROM '"""+chromosome[0]+"""' GROUP BY start
) groupedt ON t.start=groupedt.start AND t.score=MAXSCORE;
                                    """)
        for v in values.fetchall():
            cursor1.execute("INSERT INTO '"+chromosome[0]+"' VALUES (?,?,?) ", (v[0], v[1], v[2],) )
    db1.commit()
    db2.commit()
    cursor1.close()
    cursor2.close()
    db2.close()
    db2.close()
    return track_scanned_signal
Esempio n. 3
0
 def __iter__(self):
     if self.selection['type'] == 'chr':
         yield self.track.read(self.selection['chr'], self.fields)
     elif self.selection['type'] == 'all':
         for chr in self.track.chrs: yield self.track.read(chr, self.fields)
     elif self.selection['type'] == 'regions':
         for span in self.selection['regions']: yield self.track.read(span, self.fields)
     elif self.selection['type'] == 'trackchr':
         with load(self.request['selected_regions'], readonly=True) as t:
             for x in self.make_overlap(t, self.selection['chr']): yield x
     elif self.selection['type'] == 'track':
         with load(self.request['selected_regions'], readonly=True) as t:
             for chrom in self.track.chrs:
                 for x in self.make_overlap(t, chrom): yield x
Esempio n. 4
0
 def runTest(self):
     sql_path = track_collections['Validation'][2]['path_sql']
     bed_path = track_collections['Validation'][2]['path']
     files = run(
        track1           = bed_path,
        track1_name      = 'Validation track two',
        track1_chrs      = yeast_chr_file,
        operation_type   = 'genomic_manip',
        manipulation     = 'bool_not',
        output_location  = tempfile.gettempdir(),
     )
     with load(sql_path, chrmeta=yeast_chr_file, readonly=True) as sql:
         with load(files[0]) as bed:
             self.assertEqual(sql.chrmeta, bed.chrmeta)
     os.remove(files[0])
Esempio n. 5
0
    def runTest(self):
        t = track_collections["Validation"][1]
        with track.load(t["path_sql"]) as t["track"]:
            t["data"] = list(t["track"].read("chr1"))
        tests = [
            {"fn": desc_stat.gmCharacteristic.number_of_features, "input": t["data"], "expected": 12},
            {"fn": desc_stat.gmCharacteristic.base_coverage, "input": t["data"], "expected": 85},
            {
                "fn": desc_stat.gmCharacteristic.length,
                "input": t["data"],
                "expected": [10, 6, 10, 5, 5, 10, 10, 10, 10, 20, 10, 10],
            },
            {
                "fn": desc_stat.gmCharacteristic.score,
                "input": t["data"],
                "expected": [10.0, 0.0, 10.0, 0.0, 0.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 5.0],
            },
        ]

        for case in tests:
            self.assertEqual(
                case["fn"](
                    [[d[track.Track.qualitative_fields.index(f)] for f in case["fn"].fields] for d in case["input"]]
                ),
                case["expected"],
            )
Esempio n. 6
0
def track_cut_down(request, track):
    regions = request['selected_regions']
    if not regions:
        #--- NO SELECTION ---#
        if not request['per_chromosome']:
           yield gmSubtrack(request, track, {'type': 'all'})
        else:
           for chr in track.chrs: yield gmSubtrack(request, track, {'type': 'chr', 'chr': chr})
    elif type(regions) == list:
        #--- STRING SELECTION ---#
        if not request['per_chromosome']:
            yield gmSubtrack(request, track, {'type': 'regions', 'regions': regions}, False)
            if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'all'}, True)
        else:
            for chr in track.chrs:
                subregions = [subr for subr in regions if subr['chr'] == chr]
                if subregions == []: continue
                yield gmSubtrack(request, track, {'type': 'regions', 'regions': subregions}, False)
                if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'chr', 'chr': chr}, True)
    else:
        #--- TRACK SELECTION ---#
        if not request['per_chromosome']:
            yield gmSubtrack(request, track, {'type': 'track', 'track': request['selected_regions']}, False)
            if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'all'}, True)
        else:
            with load(request['selected_regions'], readonly=True) as t:
                for chr in track.chrs:
                    if chr not in t: continue
                    yield gmSubtrack(request, track, {'type': 'trackchr', 'chr': chr}, False)
                    if request['compare_parents']: yield gmSubtrack(request, track, {'type': 'chr', 'chr': chr}, True)
Esempio n. 7
0
def run(**request):
    # Import the correct operation #
    if not hasattr(operations, request['operation_type']):
        try:
            __import__('gMiner.operations.' + request['operation_type'])
        except ImportError as err:
            raise Exception("The operation " + request['operation_type'] + " could not be imported because: " + str(err))
    run_op = getattr(operations, request['operation_type']).run
    # Mandatory request variables #
    if not request.get('output_location'):
        raise Exception("There does not seem to be an output location specified in the request.")
    output_dir = request['output_location'].rstrip('/')
    if not os.path.isdir(output_dir):
        raise Exception("The output location '" + output_dir + "' specified is not a directory.")
    # Optional request variables #
    request['selected_regions']   = request.get('selected_regions', '')
    parse_regions(request)
    request['wanted_chromosomes'] = request.get('wanted_chromosomes', '')
    parse_chrlist(request)
    # Prepare the tracks #
    track_dicts = parse_tracks(request)
    contexts = [track.load(t['path'], name=t['name'], chrmeta=t.get('chrs'), readonly=True) for t in track_dicts]
    with nested(*contexts) as tracks:
        # Assign numbers #
        for i, t in enumerate(tracks): t.number = i
        # Determine final chromosome list #
        if request['wanted_chromosomes']:
            for t in tracks: t.chrs = (set(t.all_chrs) & set(request['wanted_chromosomes']))
        else:
            for t in tracks: t.chrs = t.all_chrs
        # Run it #
        return run_op(request, tracks, output_dir)
Esempio n. 8
0
def create_bins(X, num_of_bins=10):
    for x in X:
        length = (x[1] - x[0]) / num_of_bins
        for i in xrange(num_of_bins):
            yield (x[0]+i*length, x[0]+(i+1)*length, x[2], x[3], x[4])

from bbcflib import track
from gMiner.operations.genomic_manip.scores import mean_score_by_feature
manip = mean_score_by_feature()
with track.load('/scratch/genomic/tracks/pol2.sql') as a:
    with track.load('/scratch/genomic/tracks/ribosome_proteins.sql') as b:
        with track.new('/tmp/manual.sql') as r:
            for chrom in a:
                r.write(chrom, manip(a.read(chrom), create_bins(b.read(chrom))))
            r.meta_chr   = a.meta_chr
            r.meta_track = {'datatype': 'qualitative', 'name': 'Mean score per bin', 'created_by': 'gMiner example script'}
Esempio n. 9
0
def run_request(case, t):
    files = gMiner.run(**t['kwargs'])
    with track.load(files[0], chrmeta=yeast_chr_file) as x:
        data = list(x.read('chr1'))
    os.remove(files[0])
    case.assertEqual(data, t['expected'])