예제 #1
0
파일: star.py 프로젝트: siebrenf/genomepy
    def after_genome_download(self, genome, force=False):
        if not cmd_ok("STAR"):
            return

        # Create index dir
        index_dir = genome.props["star"]["index_dir"]
        index_name = genome.props["star"]["index_name"]
        if force:
            # Start from scratch
            rmtree(index_dir, ignore_errors=True)
        mkdir_p(index_dir)

        if not os.path.exists(index_name):
            # If the genome is bgzipped it needs to be unzipped first
            fname = genome.filename
            bgzip = False
            if fname.endswith(".gz"):
                ret = sp.check_call(["gunzip", fname])
                if ret != 0:
                    raise Exception("Error gunzipping genome {}".format(fname))
                fname = re.sub(".gz$", "", fname)
                bgzip = True

            # Create index
            cmd = "STAR --runMode genomeGenerate --genomeFastaFiles {} --genomeDir {} --outFileNamePrefix {}".format(
                fname, index_dir, index_dir)
            run_index_cmd("star", cmd)

            # Rezip genome if it was bgzipped
            if bgzip:
                ret = sp.check_call(["bgzip", fname])
                if ret != 0:
                    raise Exception(
                        "Error bgzipping genome {}. ".format(fname) +
                        "Is tabix installed?")
예제 #2
0
파일: hisat2.py 프로젝트: siebrenf/genomepy
    def after_genome_download(self, genome, force=False):
        if not cmd_ok("hisat2-build"):
            return

        # Create index dir
        index_dir = genome.props["hisat2"]["index_dir"]
        index_name = genome.props["hisat2"]["index_name"]
        if force:
            # Start from scratch
            rmtree(index_dir, ignore_errors=True)
        mkdir_p(index_dir)

        if not any(fname.endswith(".ht2") for fname in os.listdir(index_dir)):
            # If the genome is bgzipped it needs to be unzipped first
            fname = genome.filename
            bgzip = False
            if fname.endswith(".gz"):
                ret = sp.check_call(["gunzip", fname])
                if ret != 0:
                    raise Exception("Error gunzipping genome {}".format(fname))
                fname = re.sub(".gz$", "", fname)
                bgzip = True

            # Create index
            cmd = "hisat2-build {} {}".format(fname, index_name)
            run_index_cmd("hisat2", cmd)

            if bgzip:
                ret = sp.check_call(["bgzip", fname])
                if ret != 0:
                    raise Exception(
                        "Error bgzipping genome {}. ".format(fname) +
                        "Is tabix installed?")
예제 #3
0
    def after_genome_download(self, genome):
        if not cmd_ok("gmap_build"):
            return

        # Create index dir
        index_dir = genome.props["gmap"]["index_dir"]
        mkdir_p(index_dir)

        # If the genome is bgzipped it needs to be unzipped first
        fname = genome.filename
        bgzip = False
        if fname.endswith(".gz"):
            ret = sp.check_call(["gunzip", fname])
            if ret != 0:
                raise Exception("Error gunzipping genome {}".format(fname))
            fname = re.sub(".gz$", "", fname)
            bgzip = True

        # Create index
        cmd = "gmap_build -D {} -d {} {}".format(index_dir, genome.name,
                                                 genome.filename)
        run_index_cmd("gmap", cmd)

        if bgzip:
            ret = sp.check_call(["bgzip", fname])
            if ret != 0:
                raise Exception("Error bgzipping genome {}. ".format(fname) +
                                "Is tabix installed?")
예제 #4
0
파일: gmap.py 프로젝트: masastat/genomepy
    def after_genome_download(self, genome, threads=1, force=False):
        if not cmd_ok("gmap_build"):
            return

        # Create index dir
        index_dir = genome.plugin["gmap"]["index_dir"]
        if force:
            # Start from scratch
            rm_rf(index_dir)

        if not os.path.exists(index_dir):
            # unzip genome if zipped and return up-to-date genome name
            fname, bgzip = gunzip_and_name(genome.filename)

            # gmap outputs a folder named genome.name
            # its content is moved to index dir, consistent with other plugins
            tmp_dir = mkdtemp(dir=".")
            # Create index
            cmd = f"gmap_build -D {tmp_dir} -d {genome.name} {fname}"
            run_index_cmd("gmap", cmd)

            # Move files to index_dir
            src = os.path.join(tmp_dir, genome.name)
            move(src, index_dir)
            rm_rf(tmp_dir)

            # re-zip genome if unzipped
            bgzip_and_name(fname, bgzip)
예제 #5
0
    def after_genome_download(self, genome, threads=1, force=False):
        index_name = genome.plugin["star"]["index_name"]
        if not cmd_ok("STAR") or (os.path.exists(index_name) and not force):
            return

        index_dir = genome.plugin["star"]["index_dir"]
        rm_rf(index_dir)
        mkdir_p(index_dir)

        # gunzip genome if bgzipped and return up-to-date genome name
        with extracted_file(genome.filename) as fname:
            # index command
            cmd = (f"STAR --runMode genomeGenerate --runThreadN {threads} " +
                   f"--genomeFastaFiles {fname} --genomeDir {index_dir} " +
                   f"--outFileNamePrefix {index_dir}")

            # if an annotation is present, generate a splice-aware index
            gtf_file = genome.annotation_gtf_file
            if gtf_file:
                with extracted_file(gtf_file) as _gtf_file:
                    # update index command with annotation
                    cmd += f" --sjdbGTFfile {_gtf_file}"

                    # Create index
                    run_index_cmd("star", cmd)
            else:
                logger.info("Creating STAR index without annotation file.")
                # Create index
                run_index_cmd("star", cmd)
예제 #6
0
def test_bowtie2(genome):
    """Create bowtie2 index.""" 
    assert os.path.exists(genome.filename)
    if cmd_ok("bowtie2"):
        p = Bowtie2Plugin()
        p.after_genome_download(genome)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index" , "bowtie2")
        assert os.path.exists(index_dir)
        assert os.path.exists(os.path.join(index_dir, "{}.1.bt2".format(genome.name)))
예제 #7
0
def test_hisat2(genome):
    """Create hisat2 index.""" 
    assert os.path.exists(genome.filename)
    if cmd_ok("hisat2-build"):
        p = Hisat2Plugin()
        p.after_genome_download(genome)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index" , "hisat2")
        assert os.path.exists(index_dir)
        assert os.path.exists(os.path.join(index_dir, "{}.1.ht2".format(genome.name)))
예제 #8
0
def test_gmap(genome):
    """Create gmap index.""" 
    assert os.path.exists(genome.filename)
    if cmd_ok("gmap"):
        p = GmapPlugin()
        p.after_genome_download(genome)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index" , "gmap", genome.name)
        assert os.path.exists(index_dir)
        assert os.path.exists(os.path.join(index_dir, "{}.version".format(genome.name)))
예제 #9
0
def test_minimap2(genome):
    """Create minimap2 index.""" 
    assert os.path.exists(genome.filename)
    if cmd_ok("minimap2"):
        p = Minimap2Plugin()
        p.after_genome_download(genome)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index" , "minimap2")
        assert os.path.exists(index_dir)
        assert os.path.exists(os.path.join(index_dir, "{}.mmi".format(genome.name)))
예제 #10
0
    def after_genome_download(self, genome):
        if not cmd_ok("bowtie2-build"):
            return
        
        # Create index dir
        index_dir = genome.props["bowtie2"]["index_dir"]
        index_name =  genome.props["bowtie2"]["index_name"] 
        mkdir_p(index_dir)

        # Create index
        cmd = "bowtie2-build {} {}".format(genome.filename, index_name)
        run_index_cmd("bowtie2", cmd)
예제 #11
0
    def after_genome_download(self, genome):
        if not cmd_ok("minimap2"):
            return

        # Create index dir
        index_dir = genome.props["minimap2"]["index_dir"]
        index_name =  genome.props["minimap2"]["index_name"] 
        mkdir_p(index_dir)

        # Create index
        cmd = "minimap2 -d {} {}".format(index_name, genome.filename)
        run_index_cmd("minimap2", cmd)
예제 #12
0
    def after_genome_download(self, genome, threads=1, force=False):
        index_name = genome.plugin["hisat2"]["index_name"]
        if not cmd_ok("hisat2-build") or (
            os.path.exists(f"{index_name}.1.ht2") and not force
        ):
            return

        index_dir = genome.plugin["hisat2"]["index_dir"]
        rm_rf(index_dir)
        mkdir_p(index_dir)

        # gunzip genome if bgzipped and return up-to-date genome name
        fname, bgzip = gunzip_and_name(genome.filename)

        # index command
        cmd = f"hisat2-build -p {threads} {fname} {index_name}"

        # if an annotation is present, generate a splice-aware index
        gtf_file = genome.annotation_gtf_file
        if gtf_file:
            # gunzip if gzipped
            gtf_file, gzip_file = gunzip_and_name(gtf_file)

            # generate splice and exon site files to enhance indexing
            hisat_path = (
                sp.Popen("which hisat2", stdout=sp.PIPE, shell=True)
                .stdout.read()
                .decode("utf8")
                .strip()
            )
            splice_script = hisat_path + "_extract_splice_sites.py"
            splice_file = os.path.join(genome.genome_dir, "splice_sites.txt")
            sp.check_call(
                f"python3 {splice_script} {gtf_file} > {splice_file}", shell=True
            )

            exon_script = hisat_path + "_extract_exons.py"
            exon_file = os.path.join(genome.genome_dir, "exon_sites.txt")
            sp.check_call(f"python3 {exon_script} {gtf_file} > {exon_file}", shell=True)

            # re-gzip annotation if gunzipped
            gzip_and_name(gtf_file, gzip_file)

            # update index command with annotation
            cmd += f" --ss {splice_file} --exon {exon_file}"
        else:
            print("\nCreating Hisat2 index without annotation file.")

        # Create index
        run_index_cmd("hisat2", cmd)

        # re-bgzip genome if gunzipped
        bgzip_and_name(fname, bgzip)
예제 #13
0
    def after_genome_download(self, genome):
        if not cmd_ok("minimap2"):
            return

        # Create index dir
        index_dir = genome.props["minimap2"]["index_dir"]
        index_name = genome.props["minimap2"]["index_name"]
        mkdir_p(index_dir)

        # Create index
        cmd = "minimap2 -d {} {}".format(index_name, genome.filename)
        run_index_cmd("minimap2", cmd)
예제 #14
0
    def after_genome_download(self, genome):
        if not cmd_ok("bowtie2-build"):
            return

        # Create index dir
        index_dir = genome.props["bowtie2"]["index_dir"]
        index_name = genome.props["bowtie2"]["index_name"]
        mkdir_p(index_dir)

        # Create index
        cmd = "bowtie2-build {} {}".format(genome.filename, index_name)
        run_index_cmd("bowtie2", cmd)
예제 #15
0
def test_bwa(genome):
    """Create bwa index."""
    assert os.path.exists(genome.filename)

    if cmd_ok("bwa"):
        p = BwaPlugin()
        p.after_genome_download(genome)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index", "bwa")
        assert os.path.exists(index_dir)
        assert os.path.exists(os.path.join(
            index_dir, "{}.fa.sa".format(genome.name)))
예제 #16
0
파일: gmap.py 프로젝트: Mpaperlee/genomepy
    def after_genome_download(self, genome):
        if not cmd_ok("gmap_build"):
            return

        # Create index dir
        index_dir = genome.props["gmap"]["index_dir"]
        index_name =  genome.props["gmap"]["index_name"] 
        mkdir_p(index_dir)

        # Create index
        cmd = "gmap_build -D {} -d {} {}".format(
                index_dir, genome.name, genome.filename)
        run_index_cmd("gmap", cmd)
예제 #17
0
    def after_genome_download(self, genome):
        if not cmd_ok("gmap_build"):
            return

        # Create index dir
        index_dir = genome.props["gmap"]["index_dir"]
        index_name = genome.props["gmap"]["index_name"]
        mkdir_p(index_dir)

        # Create index
        cmd = "gmap_build -D {} -d {} {}".format(index_dir, genome.name,
                                                 genome.filename)
        run_index_cmd("gmap", cmd)
예제 #18
0
파일: bwa.py 프로젝트: Mpaperlee/genomepy
    def after_genome_download(self, genome):
        if not cmd_ok("bwa"):
            return
        
        # Create index dir
        index_dir = genome.props["bwa"]["index_dir"]
        index_fa =  genome.props["bwa"]["index_name"] 
        mkdir_p(index_dir)

        if not os.path.exists(index_fa):
            os.symlink(genome.filename, index_fa)

        cmd = "bwa index {}".format(index_fa)
        run_index_cmd("bwa", cmd)
예제 #19
0
    def after_genome_download(self, genome):
        if not cmd_ok("bwa"):
            return

        # Create index dir
        index_dir = genome.props["bwa"]["index_dir"]
        index_fa = genome.props["bwa"]["index_name"]
        mkdir_p(index_dir)

        if not os.path.exists(index_fa):
            os.symlink(genome.filename, index_fa)

        cmd = "bwa index {}".format(index_fa)
        run_index_cmd("bwa", cmd)
예제 #20
0
def test_minimap2(genome, force):
    """Create minimap2 index."""
    assert os.path.exists(genome.filename)

    force = True if force == "overwrite" else False
    if cmd_ok("minimap2"):
        p = Minimap2Plugin()
        p.after_genome_download(genome, force=force)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index", "minimap2")
        fname = os.path.join(index_dir, "{}.mmi".format(genome.name))
        assert os.path.exists(index_dir)
        assert os.path.exists(fname)

        force_test(p, fname, genome, force)
예제 #21
0
def test_hisat2(genome, force):
    """Create hisat2 index."""
    assert os.path.exists(genome.filename)

    force = True if force == "overwrite" else False
    if cmd_ok("hisat2-build"):
        p = Hisat2Plugin()
        p.after_genome_download(genome)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index", "hisat2")
        fname = os.path.join(index_dir, "{}.1.ht2".format(genome.name))
        assert os.path.exists(index_dir)
        assert os.path.exists(fname)

        force_test(p, fname, genome, force)
예제 #22
0
def test_star(genome, force):
    """Create star index."""
    assert os.path.exists(genome.filename)

    force = True if force == "overwrite" else False
    if cmd_ok("STAR"):
        p = StarPlugin()
        p.after_genome_download(genome)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index", "star")
        fname = os.path.join(index_dir, "SA")
        assert os.path.exists(index_dir)
        assert os.path.exists(fname)

        force_test(p, fname, genome, force)
예제 #23
0
def test_bwa(genome, force):
    """Create bwa index."""
    assert os.path.exists(genome.filename)

    force = True if force == "overwrite" else False
    if cmd_ok("bwa"):
        p = BwaPlugin()
        p.after_genome_download(genome, force=force)
        dirname = os.path.dirname(genome.filename)
        index_dir = os.path.join(dirname, "index", "bwa")
        fname = os.path.join(index_dir, "{}.fa.sa".format(genome.name))
        assert os.path.exists(index_dir)
        assert os.path.exists(fname)

        force_test(p, fname, genome, force)
예제 #24
0
    def after_genome_download(self, genome, threads=1, force=False):
        if not cmd_ok("minimap2"):
            return

        # Create index dir
        index_dir = genome.plugin["minimap2"]["index_dir"]
        index_name = genome.plugin["minimap2"]["index_name"]
        if force:
            # Start from scratch
            rm_rf(index_dir)
        mkdir_p(index_dir)

        if not any(fname.endswith(".mmi") for fname in os.listdir(index_dir)):
            # Create index
            cmd = f"minimap2 -t {threads} -d {index_name} {genome.filename}"
            run_index_cmd("minimap2", cmd)
예제 #25
0
    def after_genome_download(self, genome, force=False):
        if not cmd_ok("bowtie2-build"):
            return

        # Create index dir
        index_dir = genome.props["bowtie2"]["index_dir"]
        index_name = genome.props["bowtie2"]["index_name"]
        if force:
            # Start from scratch
            rmtree(index_dir, ignore_errors=True)
        mkdir_p(index_dir)

        if not any(fname.endswith(".bt2") for fname in os.listdir(index_dir)):
            # Create index
            cmd = "bowtie2-build {} {}".format(genome.filename, index_name)
            run_index_cmd("bowtie2", cmd)
예제 #26
0
    def after_genome_download(self, genome, threads=1, force=False):
        if not cmd_ok("bwa"):
            return

        # Create index dir
        index_dir = genome.plugin["bwa"]["index_dir"]
        index_name = genome.plugin["bwa"]["index_name"]
        if force:
            # Start from scratch
            rm_rf(index_dir)
        mkdir_p(index_dir)

        if not any(fname.endswith(".bwt") for fname in os.listdir(index_dir)):
            # Create index
            if not os.path.exists(index_name):
                os.symlink(genome.filename, index_name)
            cmd = f"bwa index {index_name}"
            run_index_cmd("bwa", cmd)
예제 #27
0
    def after_genome_download(self, genome, force=False):
        if not cmd_ok("bwa"):
            return

        # Create index dir
        index_dir = genome.props["bwa"]["index_dir"]
        index_name = genome.props["bwa"]["index_name"]
        if force:
            # Start from scratch
            rmtree(index_dir, ignore_errors=True)
        mkdir_p(index_dir)

        if not any(fname.endswith(".bwt") for fname in os.listdir(index_dir)):
            # Create index
            if not os.path.exists(index_name):
                os.symlink(genome.filename, index_name)

            cmd = "bwa index {}".format(index_name)
            run_index_cmd("bwa", cmd)
예제 #28
0
파일: star.py 프로젝트: oftensmile/genomepy
    def after_genome_download(self, genome, threads=1, force=False):
        index_name = genome.plugin["star"]["index_name"]
        if not cmd_ok("STAR") or (os.path.exists(index_name) and not force):
            return

        index_dir = genome.plugin["star"]["index_dir"]
        rmtree(index_dir, ignore_errors=True)
        mkdir_p(index_dir)

        # gunzip genome if bgzipped and return up-to-date genome name
        fname, bgzip = gunzip_and_name(genome.filename)

        # index command
        cmd = (
            f"STAR --runMode genomeGenerate --runThreadN {threads} "
            + f"--genomeFastaFiles {fname} --genomeDir {index_dir} "
            + f"--outFileNamePrefix {index_dir}"
        )

        # if an annotation is present, generate a splice-aware index
        gtf_file = genome.annotation_gtf_file
        gzip_file = False
        if gtf_file:
            # gunzip if gzipped
            gtf_file, gzip_file = gunzip_and_name(gtf_file)

            # update index command with annotation
            cmd += f" --sjdbGTFfile {gtf_file}"
        else:
            print("\nCreating STAR index without annotation file.")

        # Create index
        run_index_cmd("star", cmd)

        # re-bgzip genome if gunzipped
        bgzip_and_name(fname, bgzip)

        # re-gzip annotation if gunzipped
        if gtf_file:
            gzip_and_name(gtf_file, gzip_file)
예제 #29
0
    def after_genome_download(self, genome, force=False):
        if not cmd_ok("gmap_build"):
            return

        # Create index dir
        index_dir = genome.props["gmap"]["index_dir"]
        if force:
            # Start from scratch
            rmtree(index_dir, ignore_errors=True)

        if not os.path.exists(index_dir):
            # If the genome is bgzipped it needs to be unzipped first
            fname = genome.filename
            bgzip = False
            if fname.endswith(".gz"):
                ret = sp.check_call(["gunzip", fname])
                if ret != 0:
                    raise Exception("Error gunzipping genome {}".format(fname))
                fname = re.sub(".gz$", "", fname)
                bgzip = True

            # gmap outputs a folder named genome.name
            # its content is moved to index dir, consistent with other plugins
            with TemporaryDirectory() as tmpdir:
                # Create index
                cmd = "gmap_build -D {} -d {} {}".format(
                    tmpdir, genome.name, fname)
                run_index_cmd("gmap", cmd)

                # Move files to index_dir
                src = os.path.join(tmpdir, genome.name)
                move(src, index_dir)

            if bgzip:
                ret = sp.check_call(["bgzip", fname])
                if ret != 0:
                    raise Exception(
                        "Error bgzipping genome {}. ".format(fname) +
                        "Is tabix installed?")