Example #1
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    if config.get("cran") or config.get("bioc") or config.get("github"):
        with shared._make_tmp_dir() as tmp_dir:
            with cd(tmp_dir):
                # Create an Rscript file with install details.
                out_file = os.path.join(tmp_dir, "install_packages.R")
                _make_install_script(out_file, config)
                # run the script and then get rid of it
                # try using either
                rlib_installed = False
                rscripts = []
                conda_bin = shared._conda_cmd(env)
                if conda_bin:
                    rscripts.append(fabutils.find_cmd(env, os.path.join(os.path.dirname(conda_bin), "Rscript"),
                                                    "--version"))
                rscripts.append(fabutils.find_cmd(env, "Rscript", "--version"))
                for rscript in rscripts:
                    if rscript:
                        env.safe_run("%s %s" % (rscript, out_file))
                        rlib_installed = True
                        break
                if not rlib_installed:
                    env.logger.warn("Rscript not found; skipping install of R libraries.")
                env.safe_run("rm -f %s" % out_file)
Example #2
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    if config.get("cran") or config.get("bioc") or config.get("github"):
        with shared._make_tmp_dir() as tmp_dir:
            with cd(tmp_dir):
                # Create an Rscript file with install details.
                out_file = os.path.join(tmp_dir, "install_packages.R")
                _make_install_script(out_file, config)
                # run the script and then get rid of it
                # try using either
                rlib_installed = False
                rscripts = []
                conda_bin = shared._conda_cmd(env)
                if conda_bin:
                    rscripts.append(
                        fabutils.find_cmd(
                            env,
                            os.path.join(os.path.dirname(conda_bin),
                                         "Rscript"), "--version"))
                rscripts.append(fabutils.find_cmd(env, "Rscript", "--version"))
                for rscript in rscripts:
                    if rscript:
                        env.safe_run("%s %s" % (rscript, out_file))
                        rlib_installed = True
                        break
                if not rlib_installed:
                    env.logger.warn(
                        "Rscript not found; skipping install of R libraries.")
                env.safe_run("rm -f %s" % out_file)
Example #3
0
 def _select_by_gid(env, orig_file):
     if gid == "hg19":
         env.safe_run("grep ^chr %s > %s" % (orig_file, out_file))
     else:
         assert gid == "GRCh37"
         env.safe_run("grep -v ^chr %s > %s" % (orig_file, out_file))
     return out_file
Example #4
0
 def _select_by_gid(env, orig_file):
     if gid == "hg19":
         env.safe_run("grep ^chr %s > %s" % (orig_file, out_file))
     else:
         assert gid == "GRCh37"
         env.safe_run("grep -v ^chr %s > %s" % (orig_file, out_file))
     return out_file
Example #5
0
def _download_background_vcf(gid):
    """Download background file of variant to use in calling.
    """
    base_url = "https://s3.amazonaws.com/biodata/variants"
    base_name = "background-diversity-1000g.vcf"
    if gid in ["GRCh37"] and not env.safe_exists("{0}.gz".format(base_name)):
        for ext in ["gz", "gz.tbi"]:
            env.safe_run("wget -c {0}/{1}.{2}".format(base_url, base_name, ext))
Example #6
0
 def _fix_chrom_names(env, orig_file):
     if gid == "hg19":
         convert_cmd = "| grep -v ^GL | grep -v ^NC | grep -v ^hs | sed 's/^/chr/'"
     else:
         assert gid == "GRCh37"
         convert_cmd = ""
     env.safe_run("zcat %s %s | bgzip -c > %s" % (orig_file, convert_cmd, out_file))
     return out_file
Example #7
0
 def _fix_chrom_names(env, orig_file):
     if gid == "hg19":
         convert_cmd = "| grep -v ^GL | grep -v ^NC | grep -v ^hs | sed 's/^/chr/'"
     else:
         assert gid == "GRCh37"
         convert_cmd = ""
     env.safe_run("zcat %s %s | bgzip -c > %s" % (orig_file, convert_cmd, out_file))
     return out_file
Example #8
0
def _create_local_virtualenv(target_dir):
    """Create virtualenv in target directory for non-sudo installs.
    """
    url = "https://raw.github.com/pypa/virtualenv/master/virtualenv.py"
    if not os.path.exists(os.path.join(target_dir, "bin", "python")):
        with _make_tmp_dir() as work_dir:
            with cd(work_dir):
                env.safe_run("wget --no-check-certificate %s" % url)
                env.safe_run("python virtualenv.py %s" % target_dir)
Example #9
0
def _create_local_virtualenv(target_dir):
    """Create virtualenv in target directory for non-sudo installs.
    """
    url = "https://raw.github.com/pypa/virtualenv/master/virtualenv.py"
    if not os.path.exists(os.path.join(target_dir, "bin", "python")):
        with _make_tmp_dir() as work_dir:
            with cd(work_dir):
                env.safe_run("wget --no-check-certificate %s" % url)
                env.safe_run("python virtualenv.py %s" % target_dir)
Example #10
0
def _dbsnp_mouse(env, gid):
    """Retrieve resources for mouse variant analysis from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf"]}
    for f in files[gid]:
        for ext in ["", ".idx"]:
            fname = f + ext
            if not env.safe_exists(fname):
                out_file = shared._remote_fetch(env, "%s%s.gz" % (remote_dir, fname))
                env.safe_run("gunzip %s" % out_file)
Example #11
0
def _dbsnp_mouse(env, gid):
    """Retrieve resources for mouse variant analysis from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf"]}
    for f in files[gid]:
        for ext in ["", ".idx"]:
            fname = f + ext
            if not env.safe_exists(fname):
                out_file = shared._remote_fetch(
                    env, "%s%s.gz" % (remote_dir, fname))
                env.safe_run("gunzip %s" % out_file)
Example #12
0
def _dbsnp_mouse(env, gid):
    """Retrieve resources for mouse variant analysis from custom S3 biodata bucket.
    """
    remote_dir = "https://s3.amazonaws.com/biodata/variants/"
    files = {"mm10": ["mm10-dbSNP-2013-09-12.vcf"]}
    for f in files[gid]:
        for ext in ["", ".idx"]:
            fname = f + ext
            if not env.safe_exists(fname):
                url = "%s%s.gz" % (remote_dir, fname)
                env.safe_run("wget -O %s -c %s" % (os.path.basename(url), url))
                env.safe_run("gunzip %s" % os.path.basename(url))
Example #13
0
def _make_install_script(out_file, config):
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    with settings(warn_only=True):
        env.safe_sudo("chown -R %s %s" % (env.user, lib_loc))
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("cran-after-bioc"):
        std2_install = """
        std2.pkgs <- c(%s)
        lapply(std2.pkgs, std.installer)
        """ % (", ".join('"%s"' % p for p in config['cran-after-bioc']))
        env.safe_append(out_file, std2_install)
Example #14
0
def download_dbnsfp(genomes):
    """Back compatible download target for dbNSFP, to be moved to GGD recipes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    gids = set(["hg19", "GRCh37"])
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes
                                    if g in gids and m.config.get("dbnsfp")):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run('mkdir -p %s' % vrn_dir)
        with cd(vrn_dir):
            _download_dbnsfp(env, gid, manager.config)
Example #15
0
def download_dbnsfp(genomes):
    """Back compatible download target for dbNSFP, to be moved to GGD recipes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    gids = set(["hg19", "GRCh37"])
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes
                                    if g in gids and m.config.get("dbnsfp")):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run('mkdir -p %s' % vrn_dir)
        with cd(vrn_dir):
            _download_dbnsfp(env, gid, manager.config)
Example #16
0
def _download_lcrs_custom(env, gid):
    """Retrieve low complexity regions from other sources.

    mm10 from Brent Pedersen: http://figshare.com/articles/LCR_mm10_bed_gz/1180124
    """
    urls = {"mm10": "http://files.figshare.com/1688228/LCR_mm10.bed.gz"}
    out_file = "LCR.bed.gz"
    cur_url = urls.get(gid)
    if cur_url and not env.safe_exists(out_file):
        def _bgzip_file(env, orig_file):
            env.safe_run("zcat %s | bgzip -c > %s" % (orig_file, out_file))
            return out_file
        shared._remote_fetch(env, cur_url, fix_fn=_bgzip_file)
        env.safe_run("tabix -p vcf -f %s" % out_file)
Example #17
0
def download_dbsnp(genomes, bundle_version, dbsnp_version):
    """Download and install dbSNP variation data for supplied genomes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes if m.config.get("dbsnp", False)):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run("mkdir -p %s" % vrn_dir)
        with cd(vrn_dir):
            if gid in ["GRCh37", "hg19"]:
                _dbsnp_human(env, gid, manager, bundle_version, dbsnp_version)
            elif gid in ["mm10", "canFam3"]:
                _dbsnp_custom(env, gid)
Example #18
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "")
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip %s" % out_file)
            env.safe_run("mv %s %s" % (broad_fname, fname))
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    return fname
Example #19
0
def _download_lcrs_custom(env, gid):
    """Retrieve low complexity regions from other sources.

    mm10 from Brent Pedersen: http://figshare.com/articles/LCR_mm10_bed_gz/1180124
    """
    urls = {"mm10": "http://files.figshare.com/1688228/LCR_mm10.bed.gz"}
    out_file = "LCR.bed.gz"
    cur_url = urls.get(gid)
    if cur_url and not env.safe_exists(out_file):
        def _bgzip_file(env, orig_file):
            env.safe_run("zcat %s | bgzip -c > %s" % (orig_file, out_file))
            return out_file
        shared._remote_fetch(env, cur_url, fix_fn=_bgzip_file)
        env.safe_run("tabix -p vcf -f %s" % out_file)
Example #20
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "")
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip %s" % out_file)
            env.safe_run("mv %s %s" % (broad_fname, fname))
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    return fname
Example #21
0
def _make_install_script(out_file, config):
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("cran-after-bioc"):
        std2_install = """
        std2.pkgs <- c(%s)
        lapply(std2.pkgs, std.installer)
        """ % (", ".join('"%s"' % p for p in config['cran-after-bioc']))
        env.safe_append(out_file, std2_install)
Example #22
0
def _ensembl_vcf(env, gid, manager):
    """Fetch ensemble vcf file (available from release 71) and do tabix indexing
    """
    fname = "%s.vcf.gz" % (manager._organism)
    download_url = manager._base_url
    section = "variation/"
    if not manager._section is "standard":
        section = ""
        fname = fname.lower()
    download_url += "release-%s/%svcf/%s/%s" % (manager._release_number, 
                    section, manager._organism.lower(), fname)
    if not env.safe_exists(fname):
        shared._remote_fetch(env, download_url)
        env.safe_run("tabix -f -p vcf %s" % fname)
Example #23
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    with shared._make_tmp_dir() as tmp_dir:
        with cd(tmp_dir):
            # Create an Rscript file with install details.
            out_file = os.path.join(tmp_dir, "install_packages.R")
            _make_install_script(out_file, config)
            # run the script and then get rid of it
            rscript = fabutils.find_cmd(env, "Rscript", "--version")
            if rscript:
                env.safe_run("%s %s" % (rscript, out_file))
            else:
                env.logger.warn("Rscript not found; skipping install of R libraries.")
            env.safe_run("rm -f %s" % out_file)
Example #24
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "")
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    if not env.safe_exists(fname):
        with warn_only():
            dl = env.safe_run("wget -c %s" % base_url)
        if dl.succeeded:
            env.safe_run("gunzip %s" % os.path.basename(base_url))
            env.safe_run("mv %s %s" % (broad_fname, fname))
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    return fname
Example #25
0
 def _install_modules_configure_make(self, env):
     """
     Differences from standard _configure_make():
         - TODO: ./configure with destination modulefile directory on shared filesystem
         - add modules to profile for all users
     """
     # currently putting module files in directory structure under env.system_install
     # it would be better to store them on a filesystem shared with worker nodes; this is harder
     env.safe_run("export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:%s/lib/pkgconfig && " \
              "./configure --prefix=%s " %
              (env.system_install, env.system_install))
     run('make')
     env.safe_sudo('make install')
     env.safe_sudo("cp etc/global/profile.modules /etc/profile.d/modules.sh")
     env.safe_sudo("ln -s {0} {1}".format(os.path.join(env.system_install, 'Modules', env.environment_modules_version), os.path.join(env.system_install, 'Modules', 'default')))
Example #26
0
def download_dbsnp(genomes, bundle_version, dbsnp_version):
    """Download and install dbSNP variation data for supplied genomes.
    """
    folder_name = "variation"
    genome_dir = os.path.join(env.data_files, "genomes")
    for (orgname, gid, manager) in ((o, g, m) for (o, g, m) in genomes
                                    if m.config.get("dbsnp", False)):
        vrn_dir = os.path.join(genome_dir, orgname, gid, folder_name)
        if not env.safe_exists(vrn_dir):
            env.safe_run('mkdir -p %s' % vrn_dir)
        with cd(vrn_dir):
            if gid in ["GRCh37", "hg19"]:
                _dbsnp_human(env, gid, manager, bundle_version, dbsnp_version)
            elif gid in ["mm10", "canFam3"]:
                _dbsnp_custom(env, gid)
Example #27
0
def _download_lcrs(gid):
    """Retrieve low complexity regions from Heng Li's variant analysis paper.
    """
    lcr_url = "https://github.com/lh3/varcmp/raw/master/scripts/LCR-hs37d5.bed.gz"
    out_file = "LCR.bed.gz"
    if not env.safe_exists(out_file):
        def _fix_chrom_names(env, orig_file):
            if gid == "hg19":
                convert_cmd = "| grep -v ^GL | grep -v ^NC | grep -v ^hs | sed 's/^/chr/'"
            else:
                assert gid == "GRCh37"
                convert_cmd = ""
            env.safe_run("zcat %s %s | bgzip -c > %s" % (orig_file, convert_cmd, out_file))
            return out_file
        shared._remote_fetch(env, lcr_url, fix_fn=_fix_chrom_names)
        env.safe_run("tabix -p vcf -f %s" % out_file)
Example #28
0
def _download_ancestral(env, gid, gconfig):
    """Download ancestral genome sequence for loss of function evaluation.

    Used by LOFTEE VEP plugin: https://github.com/konradjk/loftee
    """
    base_url = "http://www.broadinstitute.org/~konradk/loftee/human_ancestor.fa.rz"
    if gid == "GRCh37":
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                shared._remote_fetch(env, base_url + ext, samedir=True)
    elif gid == "hg19":  # symlink to GRCh37 download
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
Example #29
0
def _configure_and_install_native_packages(env, pkg_install):
    """
    Setups up native package repositories, determines list
    of native packages to install, and installs them.
    """
    home_dir = env.safe_run("echo $HOME")
    if home_dir:
        if env.shell_config.startswith("~"):
            nonhome = env.shell_config.split("~/", 1)[-1]
            env.shell_config = os.path.join(home_dir, nonhome)
    if env.distribution in ["debian", "ubuntu"]:
        _setup_apt_sources()
        _setup_apt_automation()
        _add_apt_gpg_keys()
        _apt_packages(pkg_install)
    elif env.distribution in ["centos", "scientificlinux"]:
        _setup_yum_sources()
        _yum_packages(pkg_install)
        if env.edition.short_name not in ["minimal"]:
            _setup_yum_bashrc()
    elif env.distribution == "arch":
        pass  # No package support for Arch yet
    elif env.distribution == "macosx":
        brew.install_packages(env, pkg_install)
    else:
        raise NotImplementedError("Unknown target distribution")
Example #30
0
def _download_ancestral(env, gid, gconfig):
    """Download ancestral genome sequence for loss of function evaluation.

    Used by LOFTEE VEP plugin: https://github.com/konradjk/loftee
    """
    base_url = "http://www.broadinstitute.org/~konradk/loftee/human_ancestor.fa.rz"
    if gid == "GRCh37":
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                shared._remote_fetch(env, base_url + ext, samedir=True)
    elif gid == "hg19":  # symlink to GRCh37 download
        for ext in ["", ".fai"]:
            outfile = os.path.basename(base_url) + ext
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
Example #31
0
def _download_lcrs(gid):
    """Retrieve low complexity regions from Heng Li's variant analysis paper.
    """
    lcr_url = "https://github.com/lh3/varcmp/raw/master/scripts/LCR-hs37d5.bed.gz"
    out_file = "LCR.bed.gz"
    if not env.safe_exists(out_file):
        def _fix_chrom_names(env, orig_file):
            if gid == "hg19":
                convert_cmd = "| grep -v ^GL | grep -v ^NC | grep -v ^hs | sed 's/^/chr/'"
            else:
                assert gid == "GRCh37"
                convert_cmd = ""
            env.safe_run("zcat %s %s | bgzip -c > %s" % (orig_file, convert_cmd, out_file))
            return out_file
        shared._remote_fetch(env, lcr_url, fix_fn=_fix_chrom_names)
        env.safe_run("tabix -p vcf -f %s" % out_file)
Example #32
0
def _configure_and_install_native_packages(env, pkg_install):
    """
    Setups up native package repositories, determines list
    of native packages to install, and installs them.
    """
    from fabric.api import env
    from cloudbio.package import brew
    from cloudbio.package.deb import (_apt_packages, _add_apt_gpg_keys,
                                      _setup_apt_automation, _setup_apt_sources)
    from cloudbio.package.rpm import (_yum_packages, _setup_yum_bashrc,
                                      _setup_yum_sources)

    home_dir = env.safe_run("echo $HOME")
    if home_dir:
        if env.shell_config.startswith("~"):
            nonhome = env.shell_config.split("~/", 1)[-1]
            env.shell_config = os.path.join(home_dir, nonhome)
    if env.distribution in ["debian", "ubuntu"]:
        _setup_apt_sources()
        _setup_apt_automation()
        _add_apt_gpg_keys()
        _apt_packages(pkg_install)
    elif env.distribution in ["centos", "scientificlinux"]:
        _setup_yum_sources()
        _yum_packages(pkg_install)
        _setup_yum_bashrc()
    elif env.distribution in ["arch", "suse"]:
        pass  # No package support for Arch, SUSE yet
    elif env.distribution == "macosx":
        brew.install_packages(env, pkg_install)
    else:
        raise NotImplementedError("Unknown target distribution")
Example #33
0
def _configure_and_install_native_packages(env, pkg_install):
    """
    Setups up native package repositories, determines list
    of native packages to install, and installs them.
    """
    env.logger.debug("Configure and install native packages for distribution: " + env.distribution)
    home_dir = env.safe_run("echo $HOME")
    if home_dir:
        if env.shell_config.startswith("~"):
            nonhome = env.shell_config.split("~/", 1)[-1]
            env.shell_config = os.path.join(home_dir, nonhome)
    if env.distribution in ["debian", "ubuntu"]:
        _setup_apt_sources()
        _setup_apt_automation()
        _add_apt_gpg_keys()
        _apt_packages(pkg_install)
    elif env.distribution in ["centos", "scientificlinux"]:
        _setup_yum_sources()
        _yum_packages(pkg_install)
        if env.edition.short_name not in ["minimal"]:
            _setup_yum_bashrc()
    elif env.distribution == "arch":
        pass  # No package support for Arch yet
    elif env.distribution == "macosx":
        brew.install_packages(env, pkg_install)
    else:
        raise NotImplementedError("Unknown target distribution")
Example #34
0
def _download_cosmic(gid):
    """Prepared versions of COSMIC, pre-sorted and indexed.
    utils/prepare_cosmic.py handles the work of creating the VCFs from standard
    COSMIC resources.
    """
    base_url = "https://s3.amazonaws.com/biodata/variants"
    version = "v67_20131024"
    supported = ["hg19", "GRCh37"]
    if gid in supported:
        url = "%s/cosmic-%s-%s.vcf.gz" % (base_url, version, gid)
        gzip_fname = os.path.basename(url)
        fname = os.path.splitext(gzip_fname)[0]
        if not env.safe_exists(fname):
            if not env.safe_exists(gzip_fname):
                shared._remote_fetch(env, url)
            env.safe_run("gunzip %s" % fname)
        if not env.safe_exists(fname + ".idx"):
            shared._remote_fetch(env, url.replace(".gz", ".idx"))
Example #35
0
def find_cmd(env, cmd, args):
    """Retrieve location of a command, checking in installation directory.
    """
    local_cmd = os.path.join(env.system_install, "bin", cmd)
    for cmd in [local_cmd, cmd]:
        with quiet():
            test_version = env.safe_run("%s %s" % (cmd, args))
        if test_version.succeeded:
            return cmd
    return None
Example #36
0
def find_cmd(env, cmd, args):
    """Retrieve location of a command, checking in installation directory.
    """
    local_cmd = os.path.join(env.system_install, "bin", cmd)
    for cmd in [local_cmd, cmd]:
        with quiet():
            test_version = env.safe_run("%s %s" % (cmd, args))
        if test_version.succeeded:
            return cmd
    return None
Example #37
0
def local_sed(filename,
              before,
              after,
              limit='',
              use_sudo=False,
              backup='.bak',
              flags='',
              shell=False):
    """ Run a search-and-replace on ``filename`` with given regex patterns.

    From main fabric contrib, modified to handle local.
    """
    func = env.safe_sudo if use_sudo else env.safe_run
    # Characters to be escaped in both
    for char in "/'":
        before = before.replace(char, r'\%s' % char)
        after = after.replace(char, r'\%s' % char)
    # Characters to be escaped in replacement only (they're useful in regexen
    # in the 'before' part)
    for char in "()":
        after = after.replace(char, r'\%s' % char)
    if limit:
        limit = r'/%s/ ' % limit
    context = {
        'script': r"'%ss/%s/%s/%sg'" % (limit, before, after, flags),
        'filename': '"$(echo %s)"' % filename,
        'backup': backup
    }
    # Test the OS because of differences between sed versions

    with hide('running', 'stdout'):
        platform = env.safe_run("uname")
    if platform in ('NetBSD', 'OpenBSD', 'QNX'):
        # Attempt to protect against failures/collisions
        hasher = hashlib.sha1()
        hasher.update(env.host_string)
        hasher.update(filename)
        context['tmp'] = "/tmp/%s" % hasher.hexdigest()
        # Use temp file to work around lack of -i
        expr = r"""cp -p %(filename)s %(tmp)s \
&& sed -r -e %(script)s %(filename)s > %(tmp)s \
&& cp -p %(filename)s %(filename)s%(backup)s \
&& mv %(tmp)s %(filename)s"""
    else:
        context['extended_regex'] = '-E' if platform == 'Darwin' else '-r'
        expr = r"sed -i%(backup)s %(extended_regex)s -e %(script)s %(filename)s"
    command = expr % context
    return func(command, shell=shell)
Example #38
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    # Create an Rscript file with install details.
    out_file = "install_packages.R"
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    # run the script and then get rid of it
    rscript = fabutils.find_cmd(env, "Rscript", "--version")
    if rscript:
        env.safe_sudo("%s %s" % (rscript, out_file))
    else:
        env.logger.warn("Rscript not found; skipping install of R libraries.")
    env.safe_run("rm -f %s" % out_file)
Example #39
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    # Create an Rscript file with install details.
    out_file = "install_packages.R"
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    lib_loc = os.path.join(env.system_install, "lib", "R", "site-library")
    env.safe_sudo("mkdir -p %s" % lib_loc)
    repo_info = """
    .libPaths(c("%s"))
    library(methods)
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (lib_loc, config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      %s
      maybe.install <- function(pname) {
        if (!(pname %%in%% installed.packages()))
          install.fn(pname)
      }
    }
    """
    if config.get("update_packages", True):
        update_str = """
        update.packages(lib.loc="%s", repos=repos, ask=FALSE)
        """ % lib_loc
    else:
        update_str = "\n"
    env.safe_append(out_file, install_fn % update_str)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    # run the script and then get rid of it
    rscript = fabutils.find_cmd(env, "Rscript", "--version")
    if rscript:
        env.safe_sudo("%s %s" % (rscript, out_file))
    else:
        env.logger.warn("Rscript not found; skipping install of R libraries.")
    env.safe_run("rm -f %s" % out_file)
Example #40
0
def local_sed(filename, before, after, limit='', use_sudo=False, backup='.bak',
              flags='', shell=False):
    """ Run a search-and-replace on ``filename`` with given regex patterns.

    From main fabric contrib, modified to handle local.
    """
    func = env.safe_sudo if use_sudo else env.safe_run
    # Characters to be escaped in both
    for char in "/'":
        before = before.replace(char, r'\%s' % char)
        after = after.replace(char, r'\%s' % char)
    # Characters to be escaped in replacement only (they're useful in regexen
    # in the 'before' part)
    for char in "()":
        after = after.replace(char, r'\%s' % char)
    if limit:
        limit = r'/%s/ ' % limit
    context = {
        'script': r"'%ss/%s/%s/%sg'" % (limit, before, after, flags),
        'filename': '"$(echo %s)"' % filename,
        'backup': backup
    }
    # Test the OS because of differences between sed versions

    with hide('running', 'stdout'):
        platform = env.safe_run("uname")
    if platform in ('NetBSD', 'OpenBSD', 'QNX'):
        # Attempt to protect against failures/collisions
        hasher = hashlib.sha1()
        hasher.update(env.host_string)
        hasher.update(filename)
        context['tmp'] = "/tmp/%s" % hasher.hexdigest()
        # Use temp file to work around lack of -i
        expr = r"""cp -p %(filename)s %(tmp)s \
&& sed -r -e %(script)s %(filename)s > %(tmp)s \
&& cp -p %(filename)s %(filename)s%(backup)s \
&& mv %(tmp)s %(filename)s"""
    else:
        context['extended_regex'] = '-E' if platform == 'Darwin' else '-r'
        expr = r"sed -i%(backup)s %(extended_regex)s -e %(script)s %(filename)s"
    command = expr % context
    return func(command, shell=shell)
Example #41
0
def local_sed(filename, before, after, limit="", use_sudo=False, backup=".bak", flags="", shell=False):
    """ Run a search-and-replace on ``filename`` with given regex patterns.

    From main fabric contrib, modified to handle local.
    """
    func = env.safe_sudo if use_sudo else env.safe_run
    # Characters to be escaped in both
    for char in "/'":
        before = before.replace(char, r"\%s" % char)
        after = after.replace(char, r"\%s" % char)
    # Characters to be escaped in replacement only (they're useful in regexen
    # in the 'before' part)
    for char in "()":
        after = after.replace(char, r"\%s" % char)
    if limit:
        limit = r"/%s/ " % limit
    context = {
        "script": r"'%ss/%s/%s/%sg'" % (limit, before, after, flags),
        "filename": '"$(echo %s)"' % filename,
        "backup": backup,
    }
    # Test the OS because of differences between sed versions

    with hide("running", "stdout"):
        platform = env.safe_run("uname")
    if platform in ("NetBSD", "OpenBSD", "QNX"):
        # Attempt to protect against failures/collisions
        hasher = hashlib.sha1()
        hasher.update(env.host_string)
        hasher.update(filename)
        context["tmp"] = "/tmp/%s" % hasher.hexdigest()
        # Use temp file to work around lack of -i
        expr = r"""cp -p %(filename)s %(tmp)s \
&& sed -r -e %(script)s %(filename)s > %(tmp)s \
&& cp -p %(filename)s %(filename)s%(backup)s \
&& mv %(tmp)s %(filename)s"""
    else:
        context["extended_regex"] = "-E" if platform == "Darwin" else "-r"
        expr = r"sed -i%(backup)s %(extended_regex)s -e %(script)s %(filename)s"
    command = expr % context
    return func(command, shell=shell)
Example #42
0
def r_library_installer(config):
    """Install R libraries using CRAN and Bioconductor.
    """
    # Create an Rscript file with install details.
    out_file = "install_packages.R"
    if env.safe_exists(out_file):
        env.safe_run("rm -f %s" % out_file)
    env.safe_run("touch %s" % out_file)
    repo_info = """
    cran.repos <- getOption("repos")
    cran.repos["CRAN" ] <- "%s"
    options(repos=cran.repos)
    source("%s")
    """ % (config["cranrepo"], config["biocrepo"])
    env.safe_append(out_file, repo_info)
    install_fn = """
    repo.installer <- function(repos, install.fn) {
      update.or.install <- function(pname) {
        if (pname %in% installed.packages())
          update.packages(lib.loc=c(pname), repos=repos, ask=FALSE)
        else
          install.fn(pname)
      }
    }
    """
    env.safe_append(out_file, install_fn)
    std_install = """
    std.pkgs <- c(%s)
    std.installer = repo.installer(cran.repos, install.packages)
    lapply(std.pkgs, std.installer)
    """ % (", ".join('"%s"' % p for p in config['cran']))
    env.safe_append(out_file, std_install)
    if len(config.get("bioc", [])) > 0:
        bioc_install = """
        bioc.pkgs <- c(%s)
        bioc.installer = repo.installer(biocinstallRepos(), biocLite)
        lapply(bioc.pkgs, bioc.installer)
        """ % (", ".join('"%s"' % p for p in config['bioc']))
        env.safe_append(out_file, bioc_install)
    if config.get("update_packages", True):
        final_update = """
        update.packages(repos=biocinstallRepos(), ask=FALSE)
        update.packages(ask=FALSE)
        """
        env.safe_append(out_file, final_update)
    # run the script and then get rid of it
    env.safe_sudo("Rscript %s" % out_file)
    env.safe_run("rm -f %s" % out_file)
Example #43
0
def _download_dbnsfp(env, gid, gconfig):
    """Download and prepare dbNSFP functional prediction resources if configured.

    Feeds into VEP for annotating VCF files:
    https://sites.google.com/site/jpopgen/dbNSFP
    https://github.com/ensembl-variation/VEP_plugins/blob/master/dbNSFP.pm
    """
    version = "2.6"
    url = "http://dbnsfp.houstonbioinformatics.org/dbNSFPzip/dbNSFPv%s.zip" % version
    if gconfig.get("dbnsfp"):
        outfile = "dbNSFP_v%s.gz" % (version)
        if gid == "GRCh37":  # download and prepare bgzipped output file
            if not env.safe_exists(outfile):
                zipfile = shared._remote_fetch(env, url, samedir=True)
                outdir = "dbNSFPv%s" % version
                env.safe_run("mkdir -p %s" % outdir)
                env.safe_run("unzip %s -d %s" % (zipfile, outdir))
                env.safe_run("cat %s/dbNSFP*_variant.chr* | bgzip -c > %s" %
                             (outdir, outfile))
                env.safe_run("rm -f %s/* && rmdir %s" % (outdir, outdir))
                env.safe_run("rm -f %s" % (zipfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("tabix -s 1 -b 2 -e 2 -c '#' %s" % outfile)
        elif gid == "hg19":  # symlink to GRCh37 download
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" %
                             (outfile, outfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("ln -sf ../../GRCh37/variation/%s.tbi %s.tbi" %
                             (outfile, outfile))
Example #44
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
            env.safe_run("tabix -f -p vcf %s" % fname)
            env.safe_run("rm -f %s" % out_file)
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
Example #45
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    broad_fname = "{name}.{gid}.vcf{ext}".format(gid=gid, name=name, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites",
                                                                "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url, allow_fail=True)
        if out_file:
            env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
            env.safe_run("tabix -f -p vcf %s" % fname)
            env.safe_run("rm -f %s" % out_file)
        else:
            env.logger.warn("dbSNP resources not available for %s" % gid)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
Example #46
0
def _download_dbnsfp(env, gid, gconfig):
    """Download and prepare dbNSFP functional prediction resources if configured.

    Feeds into VEP for annotating VCF files:
    https://sites.google.com/site/jpopgen/dbNSFP
    https://github.com/ensembl-variation/VEP_plugins/blob/master/dbNSFP.pm
    """
    version = "2.5"
    url = "http://dbnsfp.houstonbioinformatics.org/dbNSFPzip/dbNSFPv%s.zip" % version
    if gconfig.get("dbnsfp"):
        outfile = "dbNSFP_v%s.gz" % (version)
        if gid == "GRCh37":  # download and prepare bgzipped output file
            if not env.safe_exists(outfile):
                zipfile = shared._remote_fetch(env, url, samedir=True)
                outdir = "dbNSFPv%s" % version
                env.safe_run("mkdir -p %s" % outdir)
                env.safe_run("unzip %s -d %s" % (zipfile, outdir))
                env.safe_run("cat %s/dbNSFP*_variant.chr* | bgzip -c > %s" % (outdir, outfile))
                env.safe_run("rm -f %s/* && rmdir %s" % (outdir, outdir))
                env.safe_run("rm -f %s" % (zipfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("tabix -s 1 -b 2 -e 2 -c '#' %s" % outfile)
        elif gid == "hg19":  # symlink to GRCh37 download
            if not env.safe_exists(outfile):
                env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))
            if not env.safe_exists(outfile + ".tbi"):
                env.safe_run("ln -sf ../../GRCh37/variation/%s.tbi %s.tbi" % (outfile, outfile))
Example #47
0
 def _bgzip_file(env, orig_file):
     env.safe_run("zcat %s | bgzip -c > %s" % (orig_file, out_file))
     return out_file
Example #48
0
def _download_qsignature(env, gid, gconfig):
    """Download qsignature position file to detect samples problems

    :param env
    :param gid: str genome id
    :param gconfig: 

    :returns: NULL
    """
    base_url = "http://downloads.sourceforge.net/project/adamajava/qsignature.tar.bz2"
    if gid == "GRCh37":
        outfile = "qsignature.vcf"
        if not env.safe_exists(outfile):
            zipfile = shared._remote_fetch(env, base_url, samedir=True)
            outdir = "qsignature"
            env.safe_run("mkdir -p %s" % outdir)
            env.safe_run("tar -jxf %s -C %s" % (zipfile, outdir))
            env.safe_run("mv %s/qsignature_positions.txt %s" %
                         (outdir, outfile))
            env.safe_run("rm -rf %s" % outdir)
            env.safe_run("rm -rf %s" % zipfile)
    elif gid == "hg19":  # symlink to GRCh37 download
        outfile = os.path.basename(base_url)
        if not env.safe_exists(outfile):
            env.safe_run("ln -sf ../../GRCh37/variation/%s %s" %
                         (outfile, outfile))
Example #49
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    # Broad bundle directories have uneven use of ".sites" in VCF files
    # only present in hg19 for non-dbSNP resources
    sites = ".sites" if gid == "hg19" and not name.startswith("dbsnp") else ""
    broad_fname = "{name}.{gid}{sites}.vcf{ext}".format(gid=gid,
                                                        name=name,
                                                        sites=sites,
                                                        ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites",
                                                                "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url)
        env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
        env.safe_run("tabix -f -p vcf %s" % fname)
        env.safe_run("rm -f %s" % out_file)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
Example #50
0
def _download_broad_bundle(gid, bundle_version, name, ext):
    # Broad bundle directories have uneven use of ".sites" in VCF files
    # only present in hg19 for non-dbSNP resources
    sites = ".sites" if gid == "hg19" and not name.startswith("dbsnp") else ""
    broad_fname = "{name}.{gid}{sites}.vcf{ext}".format(gid=gid, name=name, sites=sites, ext=ext)
    fname = broad_fname.replace(".{0}".format(gid), "").replace(".sites", "") + ".gz"
    base_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
               "{bundle}/{gid}/{fname}.gz".format(
                   bundle=bundle_version, fname=broad_fname, gid=gid)
    # compress and prepare existing uncompressed versions
    if env.safe_exists(fname.replace(".vcf.gz", ".vcf")):
        env.safe_run("bgzip %s" % fname.replace(".vcf.gz", ".vcf"))
        env.safe_run("tabix -f -p vcf %s" % fname)
    # otherwise, download and bgzip and tabix index
    if not env.safe_exists(fname):
        out_file = shared._remote_fetch(env, base_url)
        env.safe_run("gunzip -c %s | bgzip -c > %s" % (out_file, fname))
        env.safe_run("tabix -f -p vcf %s" % fname)
        env.safe_run("rm -f %s" % out_file)
    # clean up old files
    for ext in [".vcf", ".vcf.idx"]:
        if env.safe_exists(fname.replace(".vcf.gz", ext)):
            env.safe_run("rm -f %s" % (fname.replace(".vcf.gz", ext)))
    return fname
Example #51
0
def _download_qsignature(env,gid,gconfig):
    """Download qsignature position file to detect samples problems

    :param env
    :param gid: str genome id
    :param gconfig: 

    :returns: NULL
    """
    base_url = "http://downloads.sourceforge.net/project/adamajava/qsignature.tar.bz2"
    if gid == "GRCh37":
        outfile = "qsignature.vcf"
        if not env.safe_exists(outfile):
            zipfile = shared._remote_fetch(env, base_url , samedir=True)
            outdir = "qsignature" 
            env.safe_run("mkdir -p %s" % outdir)
            env.safe_run("tar -jxf %s -C %s" % (zipfile, outdir))
            env.safe_run("mv %s/qsignature_positions.txt %s" % (outdir, outfile))
            env.safe_run("rm -rf %s" % outdir)
            env.safe_run("rm -rf %s" % zipfile)
    elif gid == "hg19":  # symlink to GRCh37 download        
        outfile = os.path.basename(base_url) 
        if not env.safe_exists(outfile):
            env.safe_run("ln -sf ../../GRCh37/variation/%s %s" % (outfile, outfile))