コード例 #1
0
ファイル: dbsnp.py プロジェクト: ashwinkalbhor/cloudbiolinux
def _download_broad_bundle(gid, bundle_version, name, ext):
    # Broad bundle directories have uneven use of ".sites" in VCF files
    # only present in hg19 for non-dbSNP resources
    sites = ".sites" if gid == "hg19" and not name.startswith("dbsnp") else ""
    broad_fname = "{name}.{gid}{sites}.vcf{ext}".format(gid=gid,
                                                        name=name,
                                                        sites=sites,
                                                        ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites",
                                                                "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url)
        env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
        env.safe_run("tabix -f -p vcf %s" % fname)
        env.safe_run("rm -f %s" % out_file)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
コード例 #2
0
def _determine_distribution(env):
    """
    Attempt to automatically determine the distribution of the target machine.

    Currently works for Ubuntu, CentOS, Debian, Scientific Linux and Mac OS X.
    """
    with quiet():
        output = env.safe_run_output("cat /etc/*release").lower()
    if output.find("distrib_id=ubuntu") >= 0:
        return "ubuntu"
    elif output.find("centos release") >= 0:
        return "centos"
    elif output.find("centos linux release") >= 0:
        return "centos"
    elif output.find("red hat") >= 0:
        return "centos"
    elif output.find("fedora release") >= 0:
        return "centos"
    elif output.find("amzn") >= 0:  # Amazon AMIs are Red-Hat based
        return "centos"
    elif output.find("suse linux") >= 0:
        return "suse"
    elif output.find("opensuse") >= 0:
        return "suse"
    elif output.find("scientific linux") >= 0:
        return "scientificlinux"
    elif env.safe_exists("/etc/debian_version"):
        return "debian"
    elif output.find("id=arch") >= 0:
        return "arch"
    # check for file used by Python's platform.mac_ver
    elif env.safe_exists("/System/Library/CoreServices/SystemVersion.plist"):
        return "macosx"
    else:
        raise Exception("Attempt to automatically determine Linux distribution of target machine failed, please manually specify distribution in fabricrc.txt")
コード例 #3
0
ファイル: dbsnp.py プロジェクト: Altoros/cloudbiolinux
def _download_dbnsfp(env, gid, gconfig):
    """Download and prepare dbNSFP functional prediction resources if configured.

    Feeds into VEP for annotating VCF files:
    https://sites.google.com/site/jpopgen/dbNSFP
    https://github.com/ensembl-variation/VEP_plugins/blob/master/dbNSFP.pm
    """
    version = "2.5"
    url = "http://dbnsfp.houstonbioinformatics.org/dbNSFPzip/dbNSFPv%s.zip" % version
    if gconfig.get("dbnsfp"):
        outfile = "dbNSFP_v%s.gz" % (version)
        if gid == "GRCh37":  # download and prepare bgzipped output file
            if not env.safe_exists(outfile):
                zipfile = shared._remote_fetch(env, url, samedir=True)
                outdir = "dbNSFPv%s" % version
                env.safe_run("mkdir -p %s" % outdir)
                env.safe_run("unzip %s -d %s" % (zipfile, outdir))
                env.safe_run("cat %s/dbNSFP*_variant.chr* | bgzip -c > %s" % (outdir, outfile))
                env.safe_run("rm -f %s/* && rmdir %s" % (outdir, outdir))
                env.safe_run("rm -f %s" % (zipfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("tabix -s 1 -b 2 -e 2 -c '#' %s" % outfile)
        elif gid == "hg19":  # symlink to GRCh37 download
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("ln -sf ../../GRCh37/variation/%s.tbi %s.tbi" % (outfile, outfile))
コード例 #4
0
ファイル: dbsnp.py プロジェクト: ashwinkalbhor/cloudbiolinux
def _download_qsignature(env, gid, gconfig):
    """Download qsignature position file to detect samples problems

    :param env
    :param gid: str genome id
    :param gconfig: 

    :returns: NULL
    """
    base_url = "http://downloads.sourceforge.net/project/adamajava/qsignature.tar.bz2"
    if gid == "GRCh37":
        outfile = "qsignature.vcf"
        if not env.safe_exists(outfile):
            zipfile = shared._remote_fetch(env, base_url, samedir=True)
            outdir = "qsignature"
            env.safe_run("mkdir -p %s" % outdir)
            env.safe_run("tar -jxf %s -C %s" % (zipfile, outdir))
            env.safe_run("mv %s/qsignature_positions.txt %s" %
                         (outdir, outfile))
            env.safe_run("rm -rf %s" % outdir)
            env.safe_run("rm -rf %s" % zipfile)
    elif gid == "hg19":  # symlink to GRCh37 download
        outfile = os.path.basename(base_url)
        if not env.safe_exists(outfile):
            env.safe_run("ln -sf ../../GRCh37/variation/%s %s" %
                         (outfile, outfile))
コード例 #5
0
ファイル: dbsnp.py プロジェクト: glebkuznetsov/cloudbiolinux
def _download_dbnsfp(env, gid, gconfig):
    """Download and prepare dbNSFP functional prediction resources if configured.

    Feeds into VEP for annotating VCF files:
    https://sites.google.com/site/jpopgen/dbNSFP
    https://github.com/ensembl-variation/VEP_plugins/blob/master/dbNSFP.pm
    """
    version = "2.8"
    url = "https://onedrive.live.com/download?cid=0D359D171E382137&resid=D359D171E382137%2154761&authkey=AFm7prRqSLLLC9g"
    dl_file = "dbNSFPv%s.zip" % version
    if gconfig.get("dbnsfp"):
        outfile = "dbNSFP_v%s.gz" % (version)
        if gid == "GRCh37" or (gid == "hg19" and not env.safe_exists("../../GRCh37")):
            if not env.safe_exists(outfile):
                zipfile = shared._remote_fetch(env, url, out_file=dl_file, samedir=True)
                outdir = "dbNSFPv%s" % version
                env.safe_run("mkdir -p %s" % outdir)
                env.safe_run("unzip %s -d %s" % (zipfile, outdir))
                env.safe_run("cat %s/dbNSFP*_variant.chr* | bgzip -c > %s" % (outdir, outfile))
                env.safe_run("rm -f %s/* && rmdir %s" % (outdir, outdir))
                env.safe_run("rm -f %s" % (zipfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("tabix -s 1 -b 2 -e 2 -c '#' %s" % outfile)
        elif gid == "hg19":  # symlink to GRCh37 download
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("ln -sf ../../GRCh37/variation/%s.tbi %s.tbi" % (outfile, outfile))
コード例 #6
0
ファイル: dbsnp.py プロジェクト: ashwinkalbhor/cloudbiolinux
def _download_dbnsfp(env, gid, gconfig):
    """Download and prepare dbNSFP functional prediction resources if configured.

    Feeds into VEP for annotating VCF files:
    https://sites.google.com/site/jpopgen/dbNSFP
    https://github.com/ensembl-variation/VEP_plugins/blob/master/dbNSFP.pm
    """
    version = "2.6"
    url = "http://dbnsfp.houstonbioinformatics.org/dbNSFPzip/dbNSFPv%s.zip" % version
    if gconfig.get("dbnsfp"):
        outfile = "dbNSFP_v%s.gz" % (version)
        if gid == "GRCh37":  # download and prepare bgzipped output file
            if not env.safe_exists(outfile):
                zipfile = shared._remote_fetch(env, url, samedir=True)
                outdir = "dbNSFPv%s" % version
                env.safe_run("mkdir -p %s" % outdir)
                env.safe_run("unzip %s -d %s" % (zipfile, outdir))
                env.safe_run("cat %s/dbNSFP*_variant.chr* | bgzip -c > %s" %
                             (outdir, outfile))
                env.safe_run("rm -f %s/* && rmdir %s" % (outdir, outdir))
                env.safe_run("rm -f %s" % (zipfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("tabix -s 1 -b 2 -e 2 -c '#' %s" % outfile)
        elif gid == "hg19":  # symlink to GRCh37 download
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" %
                             (outfile, outfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("ln -sf ../../GRCh37/variation/%s.tbi %s.tbi" %
                             (outfile, outfile))
コード例 #7
0
def _determine_distribution(env):
    """
    Attempt to automatically determine the distribution of the target machine.

    Currently works for Ubuntu, CentOS, Debian, Scientific Linux and Mac OS X.
    """
    with quiet():
        output = env.safe_run_output("cat /etc/*release").lower()
    if output.find("distrib_id=ubuntu") >= 0:
        return "ubuntu"
    elif output.find("centos release") >= 0:
        return "centos"
    elif output.find("red hat enterprise linux server release") >= 0:
        return "centos"
    elif output.find("scientific linux release") >= 0:
        return "scientificlinux"
    elif env.safe_exists("/etc/debian_version"):
        return "debian"
    # check for file used by Python's platform.mac_ver
    elif env.safe_exists("/System/Library/CoreServices/SystemVersion.plist"):
        return "macosx"
    else:
        raise Exception(
            "Attempt to automatically determine Linux distribution of target machine failed, please manually specify distribution in fabricrc.txt"
        )
コード例 #8
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites",
                                                                "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
            env.safe_run("tabix -f -p vcf %s" % fname)
            env.safe_run("rm -f %s" % out_file)
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
コード例 #9
0
ファイル: dbsnp.py プロジェクト: ashwinkalbhor/cloudbiolinux
def _download_cosmic(gid):
    """Prepared versions of COSMIC, pre-sorted and indexed.
    utils/prepare_cosmic.py handles the work of creating the VCFs from standard
    COSMIC resources.
    """
    base_url = "https://s3.amazonaws.com/biodata/variants"
    version = "v68"
    supported = ["hg19", "GRCh37"]
    if gid in supported:
        url = "%s/cosmic-%s-%s.vcf.gz" % (base_url, version, gid)
        fname = os.path.basename(url)
        if not env.safe_exists(fname):
            shared._remote_fetch(env, url)
        if not env.safe_exists(fname + ".tbi"):
            shared._remote_fetch(env, url + ".tbi")
コード例 #10
0
ファイル: dbsnp.py プロジェクト: Altoros/cloudbiolinux
def _download_cosmic(gid):
    """Prepared versions of COSMIC, pre-sorted and indexed.
    utils/prepare_cosmic.py handles the work of creating the VCFs from standard
    COSMIC resources.
    """
    base_url = "https://s3.amazonaws.com/biodata/variants"
    version = "v68"
    supported = ["hg19", "GRCh37"]
    if gid in supported:
        url = "%s/cosmic-%s-%s.vcf.gz" % (base_url, version, gid)
        fname = os.path.basename(url)
        if not env.safe_exists(fname):
            shared._remote_fetch(env, url)
        if not env.safe_exists(fname + ".tbi"):
            shared._remote_fetch(env, url + ".tbi")
コード例 #11
0
ファイル: dbsnp.py プロジェクト: Altoros/cloudbiolinux
def _download_ancestral(env, gid, gconfig):
    """Download ancestral genome sequence for loss of function evaluation.

    Used by LOFTEE VEP plugin: https://github.com/konradjk/loftee
    """
    base_url = "http://www.broadinstitute.org/~konradk/loftee/human_ancestor.fa.rz"
    if gid == "GRCh37":
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                shared._remote_fetch(env, base_url + ext, samedir=True)
    elif gid == "hg19":  # symlink to GRCh37 download
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
コード例 #12
0
ファイル: __init__.py プロジェクト: rmlawton/cloudbiolinux
def _connect_native_packages(env, pkg_install, lib_install):
    """Connect native installed packages to local versions.

    This helps setup a non-sudo environment to handle software
    that needs a local version in our non-root directory tree.
    """
    bin_dir = os.path.join(env.system_install, "bin")
    path = env.safe_run_output("echo $PATH")
    comment_line = "# CloudBioLinux PATH updates"
    if not env.safe_contains(env.shell_config, comment_line):
        env.safe_append(env.shell_config, "\n" + comment_line)
    if bin_dir not in path and env.safe_exists(env.shell_config):
        add_path = "export PATH=%s:$PATH" % bin_dir
        if not env.safe_contains(env.shell_config, add_path):
            env.safe_append(env.shell_config, add_path)
    ldlib_path = os.path.join(env.system_install, "lib")
    add_ldlibrary = "export LD_LIBRARY_PATH=%s:$LD_LIBRARY_PATH" % ldlib_path
    if not env.safe_contains(env.shell_config, add_ldlibrary):
        env.safe_append(env.shell_config, add_ldlibrary)
    perl_export = (
        "export PERL5LIB=%s/lib/perl5:%s/lib/perl5/site_perl:${PERL5LIB}" %
        (env.system_install, env.system_install))
    if not env.safe_contains(env.shell_config, perl_export):
        env.safe_append(env.shell_config, perl_export)
    if "python" in pkg_install and "python" in lib_install:
        _create_local_virtualenv(env.system_install)
コード例 #13
0
ファイル: dbsnp.py プロジェクト: remiolsen/cloudbiolinux
def _download_ancestral(env, gid, gconfig):
    """Download ancestral genome sequence for loss of function evaluation.

    Used by LOFTEE VEP plugin: https://github.com/konradjk/loftee
    """
    base_url = "http://www.broadinstitute.org/~konradk/loftee/human_ancestor.fa.rz"
    if gid == "GRCh37":
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                shared._remote_fetch(env, base_url + ext, samedir=True)
    elif gid == "hg19":  # symlink to GRCh37 download
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
コード例 #14
0
ファイル: __init__.py プロジェクト: PeterTonoli/cloudbiolinux
def _connect_native_packages(env, pkg_install, lib_install):
    """Connect native installed packages to local versions.

    This helps setup a non-sudo environment to handle software
    that needs a local version in our non-root directory tree.
    """
    bin_dir = os.path.join(env.system_install, "bin")
    path = env.safe_run_output("echo $PATH")
    comment_line = "# CloudBioLinux PATH updates"
    if not env.safe_contains(env.shell_config, comment_line):
        env.safe_append(env.shell_config, "\n" + comment_line)
    if bin_dir not in path and env.safe_exists(env.shell_config):
        add_path = "export PATH=%s:$PATH" % bin_dir
        if not env.safe_contains(env.shell_config, add_path):
            env.safe_append(env.shell_config, add_path)
    ldlib_path = os.path.join(env.system_install, "lib")
    add_ldlibrary = "export LD_LIBRARY_PATH=%s:$LD_LIBRARY_PATH" % ldlib_path
    if not env.safe_contains(env.shell_config, add_ldlibrary):
        env.safe_append(env.shell_config, add_ldlibrary)
    perl_export = ("export PERL5LIB=%s/lib/perl5:%s/lib/perl5/site_perl:${PERL5LIB}"
                   % (env.system_install, env.system_install))
    if not env.safe_contains(env.shell_config, perl_export):
        env.safe_append(env.shell_config, perl_export)
    if "python" in pkg_install and "python" in lib_install:
        _create_local_virtualenv(env.system_install)
コード例 #15
0
ファイル: dbsnp.py プロジェクト: Altoros/cloudbiolinux
def _download_background_vcf(gid):
    """Download background file of variant to use in calling.
    """
    base_url = "https://s3.amazonaws.com/biodata/variants"
    base_name = "background-diversity-1000g.vcf"
    if gid in ["GRCh37"] and not env.safe_exists("{0}.gz".format(base_name)):
        for ext in ["gz", "gz.tbi"]:
            shared._remote_fetch(env, "{0}/{1}.{2}".format(base_url, base_name, ext))
コード例 #16
0
ファイル: dbsnp.py プロジェクト: remiolsen/cloudbiolinux
def _download_background_vcf(gid):
    """Download background file of variant to use in calling.
    """
    base_url = "https://s3.amazonaws.com/biodata/variants"
    base_name = "background-diversity-1000g.vcf"
    if gid in ["GRCh37"] and not env.safe_exists("{0}.gz".format(base_name)):
        for ext in ["gz", "gz.tbi"]:
            shared._remote_fetch(env, "{0}/{1}.{2}".format(base_url, base_name, ext))
コード例 #17
0
ファイル: dbsnp.py プロジェクト: caddymob/cloudbiolinux
def _download_cosmic(gid):
    """Prepared versions of COSMIC, pre-sorted and indexed.
    utils/prepare_cosmic.py handles the work of creating the VCFs from standard
    COSMIC resources.
    """
    base_url = "https://s3.amazonaws.com/biodata/variants"
    version = "v67_20131024"
    supported = ["hg19", "GRCh37"]
    if gid in supported:
        url = "%s/cosmic-%s-%s.vcf.gz" % (base_url, version, gid)
        gzip_fname = os.path.basename(url)
        fname = os.path.splitext(gzip_fname)[0]
        if not env.safe_exists(fname):
            if not env.safe_exists(gzip_fname):
                shared._remote_fetch(env, url)
            env.safe_run("gunzip %s" % fname)
        if not env.safe_exists(fname + ".idx"):
            shared._remote_fetch(env, url.replace(".gz", ".idx"))
コード例 #18
0
ファイル: dbsnp.py プロジェクト: rmlawton/cloudbiolinux
def _dbsnp_custom(env, gid):
    """Retrieve resources for dbsnp builds from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf.gz"], "canFam3": ["canFam3-dbSNP-2014-04-10.vcf.gz"]}
    for f in files[gid]:
        for ext in ["", ".tbi"]:
            fname = f + ext
            if not env.safe_exists(fname):
                shared._remote_fetch(env, "%s%s" % (remote_dir, fname))
コード例 #19
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    # Create an Rscript file with install details.
    out_file = "install_packages.R"
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    # run the script and then get rid of it
    rscript = fabutils.find_cmd(env, "Rscript", "--version")
    if rscript:
        env.safe_sudo("%s %s" % (rscript, out_file))
    else:
        env.logger.warn("Rscript not found; skipping install of R libraries.")
    env.safe_run("rm -f %s" % out_file)
コード例 #20
0
ファイル: libraries.py プロジェクト: rmlawton/cloudbiolinux
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    # Create an Rscript file with install details.
    out_file = "install_packages.R"
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    # run the script and then get rid of it
    rscript = fabutils.find_cmd(env, "Rscript", "--version")
    if rscript:
        env.safe_sudo("%s %s" % (rscript, out_file))
    else:
        env.logger.warn("Rscript not found; skipping install of R libraries.")
    env.safe_run("rm -f %s" % out_file)
コード例 #21
0
ファイル: dbsnp.py プロジェクト: remiolsen/cloudbiolinux
def _dbsnp_custom(env, gid):
    """Retrieve resources for dbsnp builds from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf.gz"],
             "canFam3": ["canFam3-dbSNP-2014-05-10.vcf.gz"]}
    for f in files[gid]:
        for ext in ["", ".tbi"]:
            fname = f + ext
            if not env.safe_exists(fname):
                shared._remote_fetch(env, "%s%s" % (remote_dir, fname))
コード例 #22
0
ファイル: dbsnp.py プロジェクト: caddymob/cloudbiolinux
def _dbsnp_mouse(env, gid):
    """Retrieve resources for mouse variant analysis from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf"]}
    for f in files[gid]:
        for ext in ["", ".idx"]:
            fname = f + ext
            if not env.safe_exists(fname):
                out_file = shared._remote_fetch(env, "%s%s.gz" % (remote_dir, fname))
                env.safe_run("gunzip %s" % out_file)
コード例 #23
0
ファイル: dbsnp.py プロジェクト: JCVI-Cloud/cloudbiolinux
def _dbsnp_mouse(env, gid):
    """Retrieve resources for mouse variant analysis from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf"]}
    for f in files[gid]:
        for ext in ["", ".idx"]:
            fname = f + ext
            if not env.safe_exists(fname):
                url = "%s%s.gz" % (remote_dir, fname)
                env.safe_run("wget -O %s -c %s" % (os.path.basename(url), url))
                env.safe_run("gunzip %s" % os.path.basename(url))
コード例 #24
0
def _dbsnp_mouse(env, gid):
    """Retrieve resources for mouse variant analysis from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf"]}
    for f in files[gid]:
        for ext in ["", ".idx"]:
            fname = f + ext
            if not env.safe_exists(fname):
                out_file = shared._remote_fetch(
                    env, "%s%s.gz" % (remote_dir, fname))
                env.safe_run("gunzip %s" % out_file)
コード例 #25
0
def _determine_distribution(env):
    """
    Attempt to automatically determine the distribution of the target machine.

    Currently works for Ubuntu, CentOS, Debian, Scientific Linux and Mac OS X.
    """
    with quiet():
        output = env.safe_run_output("cat /etc/*release").lower()
    if output.find("id=ubuntu") >= 0:
        return "ubuntu"
    elif output.find("centos release") >= 0:
        return "centos"
    elif output.find("centos linux release") >= 0:
        return "centos"
    elif output.find("red hat enterprise linux") >= 0:
        return "centos"
    elif output.find("fedora") >= 0:
        return "centos"
    # Amazon AMIs are Red-Hat based
    elif output.find("amzn") >= 0 or output.find("amazon") >= 0:
        return "centos"
    elif output.find("suse linux") >= 0:
        return "suse"
    elif output.find("opensuse") >= 0:
        return "suse"
    elif output.find("scientific linux") >= 0:
        return "scientificlinux"
    elif env.safe_exists("/etc/debian_version"):
        return "debian"
    elif output.find("id=arch") >= 0 or output.find('id_like="arch"') >= 0:
        return "arch"
    elif output.find("antergos") >= 0:
        return "arch"
    # check for file used by Python's platform.mac_ver
    elif env.safe_exists("/System/Library/CoreServices/SystemVersion.plist"):
        return "macosx"
    else:
        raise Exception(
            "Attempt to automatically determine Linux distribution of target machine failed:\n%s"
            % output)
コード例 #26
0
def _make_install_script(out_file, config):
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    with settings(warn_only=True):
        env.safe_sudo("chown -R %s %s" % (env.user, lib_loc))
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("cran-after-bioc"):
        std2_install = """
        std2.pkgs <- c(%s)
        lapply(std2.pkgs, std.installer)
        """ % (", ".join('"%s"' % p for p in config['cran-after-bioc']))
        env.safe_append(out_file, std2_install)
コード例 #27
0
def download_dbnsfp(genomes):
    """Back compatible download target for dbNSFP, to be moved to GGD recipes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    gids = set(["hg19", "GRCh37"])
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes
                                    if g in gids and m.config.get("dbnsfp")):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run('mkdir -p %s' % vrn_dir)
        with cd(vrn_dir):
            _download_dbnsfp(env, gid, manager.config)
コード例 #28
0
def local_append(filename, text, use_sudo=False, partial=False, escape=True, shell=False):
    func = use_sudo and env.safe_sudo or env.safe_run
    # Normalize non-list input to be a list
    if isinstance(text, basestring):
        text = [text]
    for line in text:
        regex = '^' + _escape_for_regex(line)  + ('' if partial else '$')
        if (env.safe_exists(filename, use_sudo=use_sudo) and line
            and env.safe_contains(filename, regex, use_sudo=use_sudo, escape=False,
                                  shell=shell)):
            continue
        line = line.replace("'", r"'\\''") if escape else line
        func("echo '%s' >> %s" % (line, _expand_path(filename)))
コード例 #29
0
ファイル: fabutils.py プロジェクト: adaaouak/cloudbiolinux
def local_append(filename, text, use_sudo=False, partial=False, escape=True, shell=False):
    func = use_sudo and env.safe_sudo or env.safe_run
    # Normalize non-list input to be a list
    if isinstance(text, basestring):
        text = [text]
    for line in text:
        regex = '^' + _escape_for_regex(line)  + ('' if partial else '$')
        if (env.safe_exists(filename, use_sudo=use_sudo) and line
            and env.safe_contains(filename, regex, use_sudo=use_sudo, escape=False,
                                  shell=shell)):
            continue
        line = line.replace("'", r"'\\''") if escape else line
        func("echo '%s' >> %s" % (line, _expand_path(filename)))
コード例 #30
0
ファイル: dbsnp.py プロジェクト: Fredus14/cloudbiolinux
def download_dbnsfp(genomes):
    """Back compatible download target for dbNSFP, to be moved to GGD recipes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    gids = set(["hg19", "GRCh37"])
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes
                                    if g in gids and m.config.get("dbnsfp")):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run('mkdir -p %s' % vrn_dir)
        with cd(vrn_dir):
            _download_dbnsfp(env, gid, manager.config)
コード例 #31
0
ファイル: dbsnp.py プロジェクト: remiolsen/cloudbiolinux
def _ensembl_vcf(env, gid, manager):
    """Fetch ensemble vcf file (available from release 71) and do tabix indexing
    """
    fname = "%s.vcf.gz" % (manager._organism)
    download_url = manager._base_url
    section = "variation/"
    if not manager._section is "standard":
        section = ""
        fname = fname.lower()
    download_url += "release-%s/%svcf/%s/%s" % (manager._release_number, 
                    section, manager._organism.lower(), fname)
    if not env.safe_exists(fname):
        shared._remote_fetch(env, download_url)
        env.safe_run("tabix -f -p vcf %s" % fname)
コード例 #32
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "")
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip %s" % out_file)
            env.safe_run("mv %s %s" % (broad_fname, fname))
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    return fname
コード例 #33
0
ファイル: dbsnp.py プロジェクト: glebkuznetsov/cloudbiolinux
def _download_lcrs_custom(env, gid):
    """Retrieve low complexity regions from other sources.

    mm10 from Brent Pedersen: http://figshare.com/articles/LCR_mm10_bed_gz/1180124
    """
    urls = {"mm10": "http://files.figshare.com/1688228/LCR_mm10.bed.gz"}
    out_file = "LCR.bed.gz"
    cur_url = urls.get(gid)
    if cur_url and not env.safe_exists(out_file):
        def _bgzip_file(env, orig_file):
            env.safe_run("zcat %s | bgzip -c > %s" % (orig_file, out_file))
            return out_file
        shared._remote_fetch(env, cur_url, fix_fn=_bgzip_file)
        env.safe_run("tabix -p vcf -f %s" % out_file)
コード例 #34
0
ファイル: dbsnp.py プロジェクト: glebkuznetsov/cloudbiolinux
def _download_qsignature(env, gid, gconfig):
    """Download qsignature position file to detect samples problems

    :param env
    :param gid: str genome id
    :param gconfig: 

    :returns: NULL
    """
    base_url = "http://downloads.sourceforge.net/project/adamajava/qsignature.tar.bz2"
    outfile = "qsignature.vcf"
    if gid == "GRCh37" or (gid == "hg19" and not env.safe_exists("../../GRCh37")):
        if not env.safe_exists(outfile):
            zipfile = shared._remote_fetch(env, base_url, samedir=True)
            outdir = "qsignature"
            env.safe_run("mkdir -p %s" % outdir)
            env.safe_run("tar -jxf %s -C %s" % (zipfile, outdir))
            env.safe_run("mv %s/qsignature_positions.txt %s" % (outdir, outfile))
            env.safe_run("rm -rf %s" % outdir)
            env.safe_run("rm -rf %s" % zipfile)
    elif gid == "hg19":  # symlink to GRCh37 download
        if not env.safe_exists(outfile):
            env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
コード例 #35
0
ファイル: dbsnp.py プロジェクト: rmlawton/cloudbiolinux
def download_dbsnp(genomes, bundle_version, dbsnp_version):
    """Download and install dbSNP variation data for supplied genomes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes if m.config.get("dbsnp", False)):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run("mkdir -p %s" % vrn_dir)
        with cd(vrn_dir):
            if gid in ["GRCh37", "hg19"]:
                _dbsnp_human(env, gid, manager, bundle_version, dbsnp_version)
            elif gid in ["mm10", "canFam3"]:
                _dbsnp_custom(env, gid)
コード例 #36
0
def _download_lcrs_custom(env, gid):
    """Retrieve low complexity regions from other sources.

    mm10 from Brent Pedersen: http://figshare.com/articles/LCR_mm10_bed_gz/1180124
    """
    urls = {"mm10": "http://files.figshare.com/1688228/LCR_mm10.bed.gz"}
    out_file = "LCR.bed.gz"
    cur_url = urls.get(gid)
    if cur_url and not env.safe_exists(out_file):
        def _bgzip_file(env, orig_file):
            env.safe_run("zcat %s | bgzip -c > %s" % (orig_file, out_file))
            return out_file
        shared._remote_fetch(env, cur_url, fix_fn=_bgzip_file)
        env.safe_run("tabix -p vcf -f %s" % out_file)
コード例 #37
0
def _make_install_script(out_file, config):
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("cran-after-bioc"):
        std2_install = """
        std2.pkgs <- c(%s)
        lapply(std2.pkgs, std.installer)
        """ % (", ".join('"%s"' % p for p in config['cran-after-bioc']))
        env.safe_append(out_file, std2_install)
コード例 #38
0
ファイル: dbsnp.py プロジェクト: caddymob/cloudbiolinux
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "")
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip %s" % out_file)
            env.safe_run("mv %s %s" % (broad_fname, fname))
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    return fname
コード例 #39
0
ファイル: dbsnp.py プロジェクト: Altoros/cloudbiolinux
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
            env.safe_run("tabix -f -p vcf %s" % fname)
            env.safe_run("rm -f %s" % out_file)
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
コード例 #40
0
ファイル: dbsnp.py プロジェクト: remiolsen/cloudbiolinux
def _download_sv_repeats(gid):
    """Retrieve telomere and centromere exclusion regions for structural variant calling.
    From Delly: https://github.com/tobiasrausch/delly
    """
    mere_url = "https://raw.githubusercontent.com/chapmanb/delly/master/human.hg19.excl.tsv"
    out_file = "sv_repeat_telomere_centromere.bed"
    if not env.safe_exists(out_file):
        def _select_by_gid(env, orig_file):
            if gid == "hg19":
                env.safe_run("grep ^chr %s > %s" % (orig_file, out_file))
            else:
                assert gid == "GRCh37"
                env.safe_run("grep -v ^chr %s > %s" % (orig_file, out_file))
            return out_file
        shared._remote_fetch(env, mere_url, fix_fn=_select_by_gid)
コード例 #41
0
ファイル: dbsnp.py プロジェクト: abdo3a/cloudbiolinux
def _download_broad_bundle(gid, bundle_version, name, ext):
    # Broad bundle directories have uneven use of ".sites" in VCF files
    # only present in hg19 for non-dbSNP resources
    sites = ".sites" if gid == "hg19" and not name.startswith("dbsnp") else ""
    broad_fname = "{name}.{gid}{sites}.vcf{ext}".format(gid=gid, name=name, sites=sites, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url)
        env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
        env.safe_run("tabix -f -p vcf %s" % fname)
        env.safe_run("rm -f %s" % out_file)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
コード例 #42
0
ファイル: dbsnp.py プロジェクト: ashwinkalbhor/cloudbiolinux
def download_dbsnp(genomes, bundle_version, dbsnp_version):
    """Download and install dbSNP variation data for supplied genomes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes
                                    if m.config.get("dbsnp", False)):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run('mkdir -p %s' % vrn_dir)
        with cd(vrn_dir):
            if gid in ["GRCh37", "hg19"]:
                _dbsnp_human(env, gid, manager, bundle_version, dbsnp_version)
            elif gid in ["mm10", "canFam3"]:
                _dbsnp_custom(env, gid)
コード例 #43
0
ファイル: dbsnp.py プロジェクト: JCVI-Cloud/cloudbiolinux
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "")
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    if not env.safe_exists(fname):
        with warn_only():
            dl = env.safe_run("wget -c %s" % base_url)
        if dl.succeeded:
            env.safe_run("gunzip %s" % os.path.basename(base_url))
            env.safe_run("mv %s %s" % (broad_fname, fname))
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    return fname
コード例 #44
0
ファイル: dbsnp.py プロジェクト: Altoros/cloudbiolinux
def _download_sv_repeats(gid):
    """Retrieve telomere and centromere exclusion regions for structural variant calling.
    From Delly: https://github.com/tobiasrausch/delly
    """
    mere_url = "https://raw.githubusercontent.com/chapmanb/delly/master/human.hg19.excl.tsv"
    out_file = "sv_repeat_telomere_centromere.bed"
    if not env.safe_exists(out_file):
        def _select_by_gid(env, orig_file):
            if gid == "hg19":
                env.safe_run("grep ^chr %s > %s" % (orig_file, out_file))
            else:
                assert gid == "GRCh37"
                env.safe_run("grep -v ^chr %s > %s" % (orig_file, out_file))
            return out_file
        shared._remote_fetch(env, mere_url, fix_fn=_select_by_gid)
コード例 #45
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    # Create an Rscript file with install details.
    out_file = "install_packages.R"
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    repo_info = """
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      update.or.install <- function(pname) {
        if (pname %in% installed.packages())
          update.packages(lib.loc=c(pname), repos=repos, ask=FALSE)
        else
          install.fn(pname)
      }
    }
    """
    env.safe_append(out_file, install_fn)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("update_packages", True):
        final_update = """
        update.packages(repos=biocinstallRepos(), ask=FALSE)
        update.packages(ask=FALSE)
        """
        env.safe_append(out_file, final_update)
    # run the script and then get rid of it
    env.safe_sudo("Rscript %s" % out_file)
    env.safe_run("rm -f %s" % out_file)
コード例 #46
0
ファイル: dbsnp.py プロジェクト: Altoros/cloudbiolinux
def _download_lcrs(gid):
    """Retrieve low complexity regions from Heng Li's variant analysis paper.
    """
    lcr_url = "https://github.com/lh3/varcmp/raw/master/scripts/LCR-hs37d5.bed.gz"
    out_file = "LCR.bed.gz"
    if not env.safe_exists(out_file):
        def _fix_chrom_names(env, orig_file):
            if gid == "hg19":
                convert_cmd = "| grep -v ^GL | grep -v ^NC | grep -v ^hs | sed 's/^/chr/'"
            else:
                assert gid == "GRCh37"
                convert_cmd = ""
            env.safe_run("zcat %s %s | bgzip -c > %s" % (orig_file, convert_cmd, out_file))
            return out_file
        shared._remote_fetch(env, lcr_url, fix_fn=_fix_chrom_names)
        env.safe_run("tabix -p vcf -f %s" % out_file)
コード例 #47
0
ファイル: dbsnp.py プロジェクト: remiolsen/cloudbiolinux
def _download_lcrs(gid):
    """Retrieve low complexity regions from Heng Li's variant analysis paper.
    """
    lcr_url = "https://github.com/lh3/varcmp/raw/master/scripts/LCR-hs37d5.bed.gz"
    out_file = "LCR.bed.gz"
    if not env.safe_exists(out_file):
        def _fix_chrom_names(env, orig_file):
            if gid == "hg19":
                convert_cmd = "| grep -v ^GL | grep -v ^NC | grep -v ^hs | sed 's/^/chr/'"
            else:
                assert gid == "GRCh37"
                convert_cmd = ""
            env.safe_run("zcat %s %s | bgzip -c > %s" % (orig_file, convert_cmd, out_file))
            return out_file
        shared._remote_fetch(env, lcr_url, fix_fn=_fix_chrom_names)
        env.safe_run("tabix -p vcf -f %s" % out_file)
コード例 #48
0
ファイル: __init__.py プロジェクト: vallurumk/cloudbiolinux
def _connect_native_packages(env, pkg_install, lib_install):
    """Connect native installed packages to local versions.

    This helps setup a non-sudo environment to handle software
    that needs a local version in our non-root directory tree.
    """
    bin_dir = os.path.join(env.system_install, "bin")
    exports = _get_shell_exports(env)
    path = env.safe_run_output("echo $PATH")
    comment_line = "# CloudBioLinux PATH updates"
    if not env.safe_contains(env.shell_config, comment_line):
        env.safe_append(env.shell_config, "\n" + comment_line)
    if bin_dir not in path and env.safe_exists(env.shell_config):
        if not env.safe_contains(env.shell_config, exports["path"]):
            env.safe_append(env.shell_config, exports["path"])
    if "python" in pkg_install and "python" in lib_install:
        _create_local_virtualenv(env.system_install)
コード例 #49
0
def _make_install_script(out_file, config):
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    with settings(warn_only=True):
        env.safe_sudo("chown -R %s %s" % (env.user, lib_loc))
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn, pkg_name_fn) {
      %s
      maybe.install <- function(pname) {
        check_name <- ifelse(is.null(pkg_name_fn), pname, pkg_name_fn(pname))
        if (!(is.element(check_name, installed.packages()[,1])))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages, NULL)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite, NULL)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("cran-after-bioc"):
        std2_install = """
        std2.pkgs <- c(%s)
        lapply(std2.pkgs, std.installer)
        """ % (", ".join('"%s"' % p for p in config['cran-after-bioc']))
        env.safe_append(out_file, std2_install)
    if config.get("github"):
        dev_install = """
        library(devtools)
        github.pkgs <- c(%s)
        get_pkg_name <- function(orig) {
          unlist(strsplit(unlist(strsplit(orig, "/"))[2], "@"))[1]
        }
        github_installer = repo.installer(NULL, install_github, get_pkg_name)
        lapply(github.pkgs, github_installer)
        """ % (", ".join('"%s"' % p for p in config['github']))
        env.safe_append(out_file, dev_install)
コード例 #50
0
ファイル: dbsnp.py プロジェクト: JCVI-Cloud/cloudbiolinux
def _download_cosmic(gid):
    base_url = "http://www.broadinstitute.org/cancer/cga/sites/default/files/data/tools/mutect/"
    base_name = "b37_cosmic_v54_120711.vcf"
    if gid in ["GRCh37"] and not env.safe_exists(base_name):
        env.safe_run("wget -c {0}/{1}".format(base_url, base_name))
コード例 #51
0
ファイル: libraries.py プロジェクト: Fredus14/cloudbiolinux
def _make_install_script(out_file, config):
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    with settings(warn_only=True):
        env.safe_sudo("chown -R %s %s" % (env.user, lib_loc))
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    """ % (lib_loc, config["cranrepo"])
    if config.get("biocrepo"):
        repo_info += """\nsource("%s")\n""" % config["biocrepo"]
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn, pkg_name_fn) {
      %s
      maybe.install <- function(pname) {
        if (!is.null(pkg_name_fn)) {
           pinfo <- pkg_name_fn(pname)
           ipkgs <- installed.packages()[,3][pinfo["pkg"]]
           if (is.na(ipkgs[pinfo["pkg"]]) || pinfo["version"] != ipkgs[pinfo["pkg"]])
             try(install.fn(pinfo["pname"]))
        }
        else if (!(is.element(pname, installed.packages()[,1])))
           install.fn(pname)
      }

    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    if len(config.get("cran") or []) > 0:
        std_install = """
        std.pkgs <- c(%s)
        std.installer = repo.installer(cran.repos, install.packages, NULL)
        lapply(std.pkgs, std.installer)
        """ % (", ".join('"%s"' % p for p in config['cran']))
        env.safe_append(out_file, std_install)
    if len(config.get("bioc") or []) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite, NULL)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("cran-after-bioc"):
        std2_install = """
        std2.pkgs <- c(%s)
        lapply(std2.pkgs, std.installer)
        """ % (", ".join('"%s"' % p for p in config['cran-after-bioc']))
        env.safe_append(out_file, std2_install)
    if config.get("github"):
        dev_install = """
        library(devtools)
        github.pkgs <- c(%s)
        get_pkg_name <- function(orig) {
          c(pkg=unlist(strsplit(unlist(strsplit(orig, "/"))[2], "@"))[1],
            version=unlist(strsplit(orig, ";"))[2],
            pname=unlist(strsplit(orig, ";"))[1])
        }
        gh_install <- function(name) {
          install_github(name, upgrade_dependencies=FALSE)
        }
        github_installer = repo.installer(NULL, gh_install, get_pkg_name)
        lapply(github.pkgs, github_installer)
        """ % (", ".join('"%s"' % p for p in config['github']))
        env.safe_append(out_file, dev_install)
コード例 #52
0
def _download_cosmic(gid):
    base_url = "http://www.broadinstitute.org/cancer/cga/sites/default/files/data/tools/mutect/"
    base_name = "b37_cosmic_v54_120711.vcf"
    if gid in ["GRCh37"] and not env.safe_exists(base_name):
        shared._remote_fetch(env, "{0}/{1}".format(base_url, base_name))