Beispiel #1
0
def test_ncbi_human(): 
    """Test NCBI.
    
    Download human genome from NCBI and retrieve a 
    specific sequence.
    """
    tmp = mkdtemp()
    genomepy.install_genome("GRCh38.p9", "NCBI", genome_dir=tmp)
    g = genomepy.Genome("GRCh38.p9", genome_dir=tmp)
    seq = g["6"][166168664:166168679] 
    assert str(seq) == "CCTCCTCGCTCTCTT"
    shutil.rmtree(tmp)
Beispiel #2
0
def test_ucsc_human(): 
    """Test UCSC.
   
    Download human genome from UCSC and retrieve a 
    specific sequence.
    """
    tmp = mkdtemp()
    genomepy.install_genome("hg38", "UCSC", genome_dir=tmp)
    g = genomepy.Genome("hg38", genome_dir=tmp)
    seq = g["chr6"][166168664:166168679] 
    assert str(seq) == "CCTCCTCGCTCTCTT"
    shutil.rmtree(tmp)
Beispiel #3
0
def test_ensembl_human(): 
    """Test Ensembl.
    
    Download human genome from Ensembl and retrieve a 
    specific sequence.
    """
    tmp = mkdtemp()
    genomepy.install_genome("GRCh38.p10", "Ensembl", genome_dir=tmp)
    g = genomepy.Genome("GRCh38.p10", genome_dir=tmp)
    seq = g["6"][166168664:166168679] 
    assert str(seq) == "CCTCCTCGCTCTCTT"
    shutil.rmtree(tmp)
Beispiel #4
0
def test_ensembl_genome(): 
    """Test Ensembl.
    
    Download Drosophila genome from Ensembl and retrieve a 
    specific sequence.
    """
    tmp = mkdtemp()
    genomepy.install_genome("BDGP6", "Ensembl", genome_dir=tmp)
    g = genomepy.Genome("BDGP6", genome_dir=tmp)
    seq = g["3L"][10637840:10637875] 
    assert str(seq).upper() == "TTTGCAACAGCTGCCGCAGTGTGACCGTTGTACTG"
    shutil.rmtree(tmp)
Beispiel #5
0
def test_ncbi_genome(): 
    """Test NCBI.
    
    Download Drosophila genome from NCBI and retrieve a 
    specific sequence.
    """
    tmp = mkdtemp()
    genomepy.install_genome("Release 6 plus ISO1 MT", "NCBI", genome_dir=tmp)
    g = genomepy.Genome("Release_6_plus_ISO1_MT", genome_dir=tmp)
    seq = g["3L"][10637840:10637875] 
    assert str(seq).upper() == "TTTGCAACAGCTGCCGCAGTGTGACCGTTGTACTG"
    shutil.rmtree(tmp)
Beispiel #6
0
def test_ucsc_genome(): 
    """Test UCSC.
    
    Download S. cerevisiae genome from UCSC and retrieve a 
    specific sequence.
    """
    tmp = mkdtemp()
    genomepy.install_genome("sacCer3", "UCSC", genome_dir=tmp)
    g = genomepy.Genome("sacCer3", genome_dir=tmp)
    seq = g["chrIV"][1337000:1337020] 
    assert str(seq) == "TTTGGTTGTTCCTCTTCCTT"
    shutil.rmtree(tmp)
Beispiel #7
0
def test_install_annotation_options(force,
                                    localname,
                                    annotation=True,
                                    genome="ASM14646v1",
                                    provider="NCBI"):
    """Test force and localname with annotations"""
    tmp = mkdtemp()
    force = False if force == "no-overwrite" else True
    localname = None if localname == "original_name" else "My_localname"

    # create dummy fasta to skip download_genome step
    name = genomepy.utils.get_localname(genome, localname)
    path = os.path.join(tmp, name, name + ".fa")
    os.mkdir(os.path.dirname(path))
    with open(path, "w") as f:
        f.write(">Chr1\nAAAACCCCTTTTGGGG\n")
    genomepy.install_genome(
        genome,
        provider,
        genome_dir=tmp,
        localname=localname,
        annotation=annotation,
        force=False,
    )

    gtf = os.path.join(tmp, name, name + ".annotation.gtf.gz")
    validate_gzipped_gtf(gtf)

    bed = os.path.join(tmp, name, name + ".annotation.bed.gz")
    validate_gzipped_bed(bed)

    # force test
    t0 = os.path.getmtime(gtf)
    # OSX rounds down getmtime to the second
    if system() != "Linux":
        sleep(1)
    genomepy.install_genome(
        genome,
        provider,
        genome_dir=tmp,
        localname=localname,
        annotation=annotation,
        force=force,
    )

    t1 = os.path.getmtime(gtf)
    assert t0 != t1 if force else t0 == t1

    shutil.rmtree(tmp)
Beispiel #8
0
def test_url_genome():
    """Test URL.

    Download S. cerevisiae genome directly from an url from UCSC and retrieve a
    specific sequence.
    """
    tmp = mkdtemp()
    genomepy.install_genome(
        "http://hgdownload.soe.ucsc.edu/goldenPath/ce11/bigZips/chromFa.tar.gz",
        "url",
        genome_dir=tmp,
        localname="url_test",
    )
    g = genomepy.Genome("url_test", genome_dir=tmp)
    assert str(g["chrI"][:12]).lower() == "gcctaagcctaa"
    shutil.rmtree(tmp)
 def __call__(self, parser, args, name, option_string=None):
     try:
         genome = Genome(name, genomes_dir=genomes_dir)
     except FileNotFoundError:
         logger.warning(f"Genome {name} not found!")
         if auto_install:
             logger.info(
                 "Trying to install it automatically using genomepy...")
             install_genome(name,
                            annotation=True,
                            genomes_dir=genomes_dir)
             genome = Genome(name, genomes_dir=genomes_dir)
         else:
             logger.info("You can install it using `genomepy install`.")
             sys.exit(1)
     setattr(args, self.dest, genome)
Beispiel #10
0
def install(
    name,
    provider,
    genomes_dir,
    localname,
    mask,
    keep_alt,
    regex,
    invert_match,
    bgzip,
    annotation,
    only_annotation,
    skip_matching,
    skip_filter,
    threads,
    force,
    **kwargs,
):
    """
    Install a genome & run active plugins.

    NAME (and more) can be obtained from genomepy search.
    """
    genomepy.install_genome(
        name,
        provider=provider,
        genomes_dir=genomes_dir,
        localname=localname,
        mask=mask,
        keep_alt=keep_alt,
        regex=regex,
        invert_match=invert_match,
        bgzip=bgzip,
        annotation=annotation,
        only_annotation=only_annotation,
        skip_matching=skip_matching,
        skip_filter=skip_filter,
        threads=threads,
        force=force,
        **kwargs,
    )
Beispiel #11
0
    def test_install_genome_options(
        force, localname, bgzip, genome="ASM2732v1", provider="NCBI"
    ):
        """Test force, localname and bgzip"""
        tmp = mkdtemp()
        force = False if force == "no-overwrite" else True
        localname = None if localname == "original_name" else "My_localname"
        bgzip = False if bgzip == "unzipped" else True

        genomepy.install_genome(
            genome,
            provider,
            genomes_dir=tmp,
            localname=localname,
            bgzip=bgzip,
            force=False,
        )

        # force test
        ext = ".fa.gz" if bgzip else ".fa"
        name = genomepy.utils.get_localname(genome, localname)
        path = os.path.join(tmp, name, name + ext)

        t0 = os.path.getmtime(path)
        # OSX rounds down getmtime to the second
        if system() != "Linux":
            sleep(1)
        genomepy.install_genome(
            genome,
            provider,
            genomes_dir=tmp,
            localname=localname,
            bgzip=bgzip,
            force=force,
        )

        t1 = os.path.getmtime(path)
        assert t0 != t1 if force else t0 == t1

        shutil.rmtree(tmp)
Beispiel #12
0
    def test_install_genome_options(force,
                                    localname,
                                    genome="ASM2732v1",
                                    provider="NCBI"):
        """Test force, localname and bgzip"""
        tmp = mkdtemp()
        force = False if force == "no-overwrite" else True
        localname = None if localname == "original_name" else "My_localname"

        genomepy.install_genome(
            genome,
            provider,
            genomes_dir=tmp,
            localname=localname,
            force=False,
        )
        sleep(1)

        # force test
        name = genomepy.utils.get_localname(genome, localname)
        path = os.path.join(tmp, name, name + ".fa")

        t0 = os.path.getmtime(path)
        # OSX rounds down getmtime to the second
        if not linux:
            sleep(1)
        genomepy.install_genome(
            genome,
            provider,
            genomes_dir=tmp,
            localname=localname,
            force=force,
        )
        sleep(1)

        t1 = os.path.getmtime(path)
        assert t0 != t1 if force else t0 == t1

        genomepy.utils.rm_rf(tmp)
        # list user plugins
        active_plugins = genomepy.config.config.get("plugin", [])
        if active_plugins:
            print("Deactivating user plugins")
            genomepy.manage_plugins("disable", active_plugins)

        # select user specified provider
        if provider is None:
            # select a provider with the annotation
            provider = providers[assembly]["annotation"]

        try:
            genomepy.install_genome(
                name=assembly,
                provider=provider,
                genomes_dir=genomes_dir,
                only_annotation=True,
                force=True,
            )

        except Exception as e:
            print(e)
            print(
                "\nSomething went wrong while downloading the gene annotation (see error message above). "
                "When this happens it is almost always because we had troubles connecting to the"
                "servers hosting the genome assemblies. Usually this is resolved by just running seq2science"
                "again, either immediately or in a couple hours.\n\n"
                "If the problem persists you could try running `seq2science clean` and see if that resolves the "
                "issue.")

        finally:
Beispiel #14
0
def install(name, provider, genome_dir, localname, mask, regex, match, annotation):
    """Install genome NAME from provider PROVIDER in directory GENOME_DIR."""
    genomepy.install_genome(
            name, provider, genome_dir=genome_dir, localname=localname, mask=mask, 
            regex=regex, invert_match=not(match), annotation=annotation)
Beispiel #15
0
def install(name, provider, genome_dir, localname, mask, regex, match, annotation):
    """Install genome NAME from provider PROVIDER in directory GENOME_DIR."""
    genomepy.install_genome(
            name, provider, genome_dir, localname=localname, mask=mask, 
            regex=regex, invert_match=not(match), annotation=annotation)
        active_plugins = genomepy.config.config.get("plugin", [])
        if active_plugins:
            print("Deactivating user plugins")
            genomepy.manage_plugins("disable", active_plugins)

        # select user specified provider
        if provider is None:
            # select a provider with the annotation, if possible
            a = providers[assembly]["annotation"]
            g = providers[assembly]["genome"]
            provider = g if a is None else a

        try:
            genomepy.install_genome(
                name=assembly,
                provider=provider,
                genomes_dir=genomes_dir,
                force=True,
            )

            # delete the support files
            # (we recreate these separately to make the output simple for snakemake and prevent redownloading)
            genomepy.files.rm_rf(f"{output}.fai")
            genomepy.files.rm_rf(f"{output}.sizes")
            genomepy.files.rm_rf(f"{output[:-2]}gaps.bed")
        except Exception as e:
            print(e)
            print("\nSomething went wrong while downloading the genome (see error message above). "
                  "When this happens it is almost always because we had troubles connecting to the "
                  "servers hosting the genome assemblies. Usually this is resolved by just running seq2science "
                  "again, either immediately or in a couple hours.\n\n"
                  "If the problem persists you could try running `seq2science clean` and see if that resolves the "