def _upgrade_snpeff_data(galaxy_dir, args, remotes):
    """Install or upgrade snpEff databases, localized to reference directory.
    """
    snpeff_version = effects.snpeff_version(args)
    if not snpeff_version:
        return
    for dbkey, ref_file in genome.get_builds(galaxy_dir):
        resource_file = os.path.join(os.path.dirname(ref_file), "%s-resources.yaml" % dbkey)
        if os.path.exists(resource_file):
            with open(resource_file) as in_handle:
                resources = yaml.load(in_handle)
            snpeff_db, snpeff_base_dir = effects.get_db({"genome_resources": resources,
                                                         "reference": {"fasta": {"base": ref_file}}})
            if snpeff_db:
                snpeff_db_dir = os.path.join(snpeff_base_dir, snpeff_db)
                if os.path.exists(snpeff_db_dir) and _is_old_database(snpeff_db_dir, args):
                    shutil.rmtree(snpeff_db_dir)
                if not os.path.exists(snpeff_db_dir):
                    print("Installing snpEff database %s in %s" % (snpeff_db, snpeff_base_dir))
                    dl_url = remotes["snpeff_dl_url"].format(
                        snpeff_ver=snpeff_version.replace(".", "_"),
                        genome=snpeff_db)
                    dl_file = os.path.basename(dl_url)
                    with utils.chdir(snpeff_base_dir):
                        subprocess.check_call(["wget", "--no-check-certificate", "-c", "-O", dl_file, dl_url])
                        subprocess.check_call(["unzip", dl_file])
                        os.remove(dl_file)
                    dl_dir = os.path.join(snpeff_base_dir, "data", snpeff_db)
                    shutil.move(dl_dir, snpeff_db_dir)
                    os.rmdir(os.path.join(snpeff_base_dir, "data"))
                if args.cwl:
                    create.directory_tarball(snpeff_db_dir)
def _prepare_cwl_tarballs(data_dir):
    """Create CWL ready tarballs for complex directories.

    Avoids need for CWL runners to pass and serialize complex directories
    of files, which is inconsistent between runners.
    """
    for dbref_dir in filter(os.path.isdir, glob.glob(os.path.join(data_dir, "genomes", "*", "*"))):
        base_dir, dbref = os.path.split(dbref_dir)
        for indexdir in TARBALL_DIRECTORIES:
            cur_target = os.path.join(dbref_dir, indexdir)
            if os.path.isdir(cur_target):
                # Some indices, like rtg, have a single nested directory
                subdirs = [x for x in os.listdir(cur_target) if os.path.isdir(os.path.join(cur_target, x))]
                if len(subdirs) == 1:
                    cur_target = os.path.join(cur_target, subdirs[0])
                create.directory_tarball(cur_target)
Exemple #3
0
def _upgrade_snpeff_data(galaxy_dir, args, remotes):
    """Install or upgrade snpEff databases, localized to reference directory.
    """
    snpeff_version = effects.snpeff_version(args)
    if not snpeff_version:
        return
    for dbkey, ref_file in genome.get_builds(galaxy_dir):
        resource_file = os.path.join(os.path.dirname(ref_file),
                                     "%s-resources.yaml" % dbkey)
        if os.path.exists(resource_file):
            with open(resource_file) as in_handle:
                resources = yaml.load(in_handle)
            snpeff_db, snpeff_base_dir = effects.get_db({
                "genome_resources": resources,
                "reference": {
                    "fasta": {
                        "base": ref_file
                    }
                }
            })
            if snpeff_db:
                snpeff_db_dir = os.path.join(snpeff_base_dir, snpeff_db)
                if os.path.exists(snpeff_db_dir) and _is_old_database(
                        snpeff_db_dir, args):
                    shutil.rmtree(snpeff_db_dir)
                if not os.path.exists(snpeff_db_dir):
                    print("Installing snpEff database %s in %s" %
                          (snpeff_db, snpeff_base_dir))
                    dl_url = remotes["snpeff_dl_url"].format(
                        snpeff_ver=snpeff_version.replace(".", "_"),
                        genome=snpeff_db)
                    dl_file = os.path.basename(dl_url)
                    with utils.chdir(snpeff_base_dir):
                        subprocess.check_call([
                            "wget", "--no-check-certificate", "-c", "-O",
                            dl_file, dl_url
                        ])
                        subprocess.check_call(["unzip", dl_file])
                        os.remove(dl_file)
                    dl_dir = os.path.join(snpeff_base_dir, "data", snpeff_db)
                    shutil.move(dl_dir, snpeff_db_dir)
                    os.rmdir(os.path.join(snpeff_base_dir, "data"))
                if args.cwl:
                    create.directory_tarball(snpeff_db_dir)
def _prepare_cwl_tarballs(data_dir):
    """Create CWL ready tarballs for complex directories.

    Avoids need for CWL runners to pass and serialize complex directories
    of files, which is inconsistent between runners.
    """
    for dbref_dir in filter(
            os.path.isdir,
            glob.glob(os.path.join(data_dir, "genomes", "*", "*"))):
        base_dir, dbref = os.path.split(dbref_dir)
        for indexdir in TARBALL_DIRECTORIES:
            cur_target = os.path.join(dbref_dir, indexdir)
            if os.path.isdir(cur_target):
                # Some indices, like rtg, have a single nested directory
                subdirs = [
                    x for x in os.listdir(cur_target)
                    if os.path.isdir(os.path.join(cur_target, x))
                ]
                if len(subdirs) == 1:
                    cur_target = os.path.join(cur_target, subdirs[0])
                create.directory_tarball(cur_target)