コード例 #1
ファイル: genrep.py プロジェクト: bbcf/pygdv
 def adn(self, ass, chr, id, **kw):
     id = int(id)
     g = GenRep()
     chrs = g.get_genrep_objects('chromosomes', 'chromosome', filters = {'name':chr}, params = {'assembly_id': ass})
     ass = Assembly(ass)
     for chrid, chrs in ass.chromosomes.iteritems():
         if chrs['name'] == chr:
             start = id * chunk
             end = start + chunk
             return g.get_sequence(chrid[0], [[start, end]])
     return ''
コード例 #3
ファイル: test_genrep.py プロジェクト: MolbioUnige/bbcflib
 def setUp(self):
     self.assembly = Assembly('ce6')
     self.assembly.genrep = GenRep(url='http://bbcftools.epfl.ch/genrep/',
     self.assembly.intype = '0'
     self.chromosomes = {
         (3066, u'NC_003279', 6): {
             'length': 15072421,
             'name': u'chrI'
         (3067, u'NC_003280', 7): {
             'length': 15279323,
             'name': u'chrII'
         (3068, u'NC_003281', 8): {
             'length': 13783681,
             'name': u'chrIII'
         (3069, u'NC_003282', 5): {
             'length': 17493785,
             'name': u'chrIV'
         (3070, u'NC_003283', 8): {
             'length': 20919568,
             'name': u'chrV'
         (3071, u'NC_003284', 7): {
             'length': 17718854,
             'name': u'chrX'
         (2948, u'NC_001328', 1): {
             'length': 13794,
             'name': u'chrM'
コード例 #4
ファイル: dnaseseq.py プロジェクト: MolbioUnige/bbcflib
import os, sys, gzip, tarfile
from bbcflib.gfminer.stream import neighborhood, segment_features, score_by_feature
from bbcflib.gfminer.common import fusion, sorted_stream
from bbcflib.gfminer.figure import lineplot
from bbcflib.common import set_file_descr, unique_filename_in, intersect_many_bed, cat, merge_sql
from bbcflib.chipseq import add_macs_results
from bbcflib.mapseq import merge_bam, index_bam
from bbcflib.track import track, convert, FeatureStream
from bbcflib.genrep import GenRep
from bbcflib.motif import save_motif_profile
from bein import program
from bein.util import touch
from numpy import vstack, zeros

_gnrp = GenRep()
_macs_flank = 300
_plot_flank = (50, 50)

def macs_bedfiles(ex, chrmeta, tests, controls, names, macs_args, via,
    missing_beds = [k for k, t in enumerate(tests) if not t[0]]
    if not missing_beds: return tests
    genome_size = sum([x['length'] for x in chrmeta.values()])
    logfile.write("Running MACS.\n")
    _tts = [tests[k][1] for k in missing_beds]
    _nms = {
        'tests': [names['tests'][k] for k in missing_beds],
        'controls': names['controls']
コード例 #5
ファイル: run_scanning.py プロジェクト: hjanime/bbcfutils
def main(argv = None):
    Entry point when program start
    genrep              = None
    assembly            = None
    lims                = None
    job                 = None
    config              = None
    config_file         = None
    background          = ""
    matrix              = ""
    original_sql_data   = ""
    random_sql_data     = ""
    track_filtered      = ""
    track_scanned       = ""
    project             = ""
    username            = ""
    identity_file       = ""
    host                = ""
    website             = ""
    remote_path         = ""
    result_path         = ""
    track_regions_path  = ""
    via                 = ""
    limspath            = ""
    fdr                 = 0
    runs                = {}
    if argv is None:
        argv = sys.argv
            opts, args = getopt.getopt   (
                                            argv[1:],"hu:c:"  ,
                                                "help", "via = ", "host = "     ,
                                                "remote_path = " , "website = " ,
                                                "minilims = ","config = "       ,
                                                "matrix = ", "username = "******"identity_file = ", "project = "
        except getopt.error, msg:
            raise Usage(msg)
        for option, value in opts:
            if option in ("-h", "--help"):
                print __doc__
                print USAGE
            elif option == "--via":
                if value == "local":
                    via = "local"
                elif value == "lsf":
                    via = "lsf"
                    raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (value,))
            elif option == "--website":
                website = normalize_url(value)
            elif option == "--minilims":
                limspath = normcase(expanduser(value))
            elif option == "--host":
                host = value
            elif option == "--identity_file":
                identity_file = value
            elif option == "--remote_path":
                remote_path = normcase(expanduser(value))
                if not remote_path.endswith(sep):
                    remote_path += sep
            elif option == "--matrix":
                matrix = {basename(value):normcase(expanduser(value))}
            elif option == "--username":
                username = value
            elif option == "--project":
                project = value
            elif option in ("-c", "--config"):
                config_file = normcase(expanduser(value))
                raise Usage("Unhandled option: " + option)

        # read config file
        if config_file is None or not exists(config_file) or not isfile(config_file):
            raise Usage("Config file missing")
            job, config = parseConfig(normcase(expanduser(config_file)))

        if project == "":
            project = job.description
        if matrix == "":
            if "matrix" in job.options:
                path = normcase(expanduser(job.options["matrix"]))
                matrix = {basename(path): path}
                raise Usage("You need give value matrix file ")
        if limspath == "":
            if "minilims" in job.options:
                limspath = job.options["minilims"]
                raise Usage("You need give value minilims path/name")
        if via == "":
            if "via" in job.options:
                via = job.options["via"]
                via = "lsf"
        if host == "" and "host" in job.options:
            host = job.options["host"]
        if identity_file == "" and "identity_file" in job.options:
            identity_file = job.options["identity_file"]
        if remote_path == "" and "remote_path" in job.options:
            remote_path = job.options["remote_path"]
        if username == "" and "username" in job.options:
            username = job.options["username"]
        if website == "" and "website" in job.options:
            website = job.options["website"]

        genrep      = GenRep(config = config)
        assembly    = genrep.assembly(job.assembly_id)
        lims        = MiniLIMS(limspath)
        json        = create_gdv_project(
                                            config["gdv"]["key"], config["gdv"]["email"],
                                            public = True
        project_id  = get_project_id( json )
        # compute false discovery rate
        with execution(lims, description = job.description) as ex:
            background = genrep.statistics  (
                                                output = unique_filename_in(),
                                                frequency = True,
                                                matrix_format = True
            if len(job.groups) >2:
                raise ValueError("They are more than 2 group in config file")

            for group_number in job.groups:
                group = job.groups[group_number]
                for run_number in group["runs"]:
                    run_iter = job.groups[group_number]["runs"][run_number]
                    if "url" in run_iter:
                        url = run_iter["url"]
                        uri = ""
                        if run_iter["run"] not in runs:
                            runs[run_iter["run"]] = {"name":None, "control":None, "experimental":None}
                        if url.startswith("http") or url.startswith("www."):
                            url = normalize_url(url)
                            # download data
                            data    = urllib2.urlopen(url)
                            uri     = unique_filename_in()
                            with open(uri, "w") as opening_file:
                            uri = normcase(expanduser(url))
                        if group["control"]:
                            runs[run_iter["run"]]["control"]   = uri
                            runs[run_iter["run"]]["name"]      = basename(uri)
                            runs[run_iter["run"]]["experimental"] = uri

            for run in runs:
                current_run         = runs[run]
                original_sql_data   = unique_filename_in()
                random_sql_data     = unique_filename_in()
                track_filtered      = unique_filename_in()
                logging.info( "[%s]" % job.description )
                logging.info( "alias %s => %s" % (current_run["experimental"], track_filtered) )

                # convert data to sql
                with Track(current_run["experimental"], chrmeta = assembly.chromosomes) as track:
                    # Get sqlite file if is not arleady in this format
                    if track.format != "sql" or track.format != "db" or track.format != "sqlite":
                        track.convert(original_sql_data, format = "sql")
                        original_sql_data = current_run["experimental"]
                    # Generate a random population from orginal if it is not give from config file
                    if current_run["control"] is None:
                        # create random track
                        track.shuffle_track(random_sql_data, repeat_number = 5)
                        with Track(current_run["control"], chrmeta = assembly.chromosomes) as track_random:
                            # Get sqlite file if is not arleady in this format
                            if track_random.format != "sql" or \
                                track_random.format != "db" or \
                                track_random.format != "sqlite":
                                track_random.convert(random_sql_data, format = "sql")
                                random_sql_data = current_run["control"]
                track_scanned, fdr, p_value = sqlite_to_false_discovery_rate(
                                                                                threshold = -100,
                                                                                via = via,
                                                                                keep_max_only = False,
                                                                                alpha = 0.05,
                                                                                nb_sample = 5.0

                # filter track with fdr as treshold
                with new(track_filtered, format = "sql", datatype = "qualitative") as track_out:
                    chromosome_used     = {}
                    track_out.meta_track = {"source": basename(current_run["experimental"])}
                    with Track(track_scanned, format = "sql", chrmeta = assembly.chromosomes) as track_in:
                        meta = dict([(v["name"], dict([("length", v["length"])])) for v in track_in.chrmeta.values()])
                        for chromosome in track_in.all_chrs:
                            data_list = []
                            for data in track_in.read   (
                                                            {"chr": chromosome, "score": (fdr, sys.maxsize)},
                                                            fields = Track.qualitative_fields
                                chromosome_used[chromosome] = meta[chromosome]
                            if len(data_list) > 0:
                                track_out.write(chromosome, data_list)
                        track_out.chrmeta = chromosome_used
                ex.add(track_filtered,      "sql: filtred %s" % track_filtered)
                logging.info( "scanned: %s" % track_scanned )
                logging.info( "score selected: %f with p: %.3f" % (fdr, p_value) )
                logging.info( "filtred: %s" % track_filtered )

                # fix track
                track_scanned_signal = fix_sqlite_db(track_scanned)
                logging.info( "scanned signal: %s" % track_scanned_signal )
                ex.add(track_scanned_signal, description="%s: sql track signal %s" % (job.description, track_scanned_signal))

                # send filtred track and scanned track to remote
                if host != "" and remote_path != "" and username != "":
                    args = []
                    if identity_file != "":
                        args = ["-i", normcase(expanduser(identity_file)), "-C" ]
                    source_filtred      = normcase(expanduser(track_filtered))
                    source_scanned      = normcase(expanduser(track_scanned_signal))
                    result_destination          = "%s@%s:%s%s%s.db" % (username, host, remote_path, sep, track_filtered)
                    result_path                 = "%s%s%s.db" % (website, sep, track_filtered)
                    track_regions_destination   = "%s@%s:%s%s%s.db" % (username, host, remote_path, sep, track_scanned_signal)
                    track_regions_path          = "%s%s%s.db" % (website, sep, track_scanned_signal)
                    scp(ex, source_filtred, result_destination, args = args)
                    scp(ex, source_scanned, track_regions_destination, args = args)
                    result_path = track_filtered
                # Send to GDV filtred track
                add_gdv_track  (
                                    config["gdv"]["key"], config["gdv"]["email"],
                                    project_id, result_path,
                                    name    = "filtred_%s" % (splitext( basename( current_run["experimental"] ) )[0]),
                                    gdv_url = config["gdv"]["url"]
                # Send to GDV scanned track
                add_gdv_track  (
                                    config["gdv"]["key"], config["gdv"]["email"],
                                    project_id, track_regions_path,
                                    name    = "regions_%s" % (splitext( basename( current_run["experimental"] ) )[0]),
                                    gdv_url = config["gdv"]["url"]
                logging.info( "++++++++++++")
            logging.info( "-------------------END--------------------")