def run(args, config_file=None): if args.list: config = Config(config_file) for name, _ in config.list_motif_sets(): print(name) return if args.list_remote: database = JasparDatabase() if args.database == 'jaspar_core': remote_sets = database.pfms_core for name in remote_sets: print(f"{name:25}\t{database.name + '_CORE'}") else: remote_sets = database.pfms_other_collections for name in remote_sets: print(f"{name:20}\t{database.name + '_Collections'}") return if args.install: install_motif(args, config_file) return if args.build: build_motif(args, config_file) return if args.uninstall: uninstall_motif(args, config_file) return
def run(args, config_file=None): if args.list: config = Config(config_file) for name, _ in config.list_genome_assemblies(): print(name) return if args.list_remote: database = UcscDatabase() for assembly in database.assemblies: print(f"{assembly.id:12}\t{database.name}\t{assembly.description}") return if args.search: database = UcscDatabase() found = False for assembly in database.search(args.search): found = True print(f"{assembly.id:12}\t{database.name}\t{assembly.description}") if not found: logger.info(f"No match found for {args.search!r}") return if args.install: install_genome(args, config_file) return if args.uninstall: uninstall_genome(args, config_file) return
def test_cli_config_set_default_motif(tmp_dir): args = parser.parse_args( ["config", "--set-default-motif", "/path/to/motif/root"]) config_file = os.path.join(tmp_dir, "test_cli_config.motifscanrc") run(args=args, config_file=config_file) config = Config(config_file) assert config.get_motif_dir() == "/path/to/motif/root"
def test_cli_config_set_motif(tmp_dir): args = parser.parse_args( ["config", "--set-motif", "motif_set", "/path/to/motif"]) config_file = os.path.join(tmp_dir, 'test_cli_config.motifscanrc') run(args=args, config_file=config_file) config = Config(config_file) assert config.has_motif_set("motif_set") assert config.get_motif_path("motif_set") == "/path/to/motif"
def test_cli_config_set_genome(tmp_dir): args = parser.parse_args( ["config", "--set-genome", "hg19", "/path/to/genome"]) config_file = os.path.join(tmp_dir, 'test_cli_config.motifscanrc') run(args=args, config_file=config_file) config = Config(config_file) assert config.has_genome_assembly("hg19") assert config.get_genome_path("hg19") == "/path/to/genome"
def load_installed_pfms(name): """Load a pre-installed motif PFMs set. Parameters ---------- name : str Name of the pre-installed motif PFMs set to be loaded. Return ------ pfms : `MotifPfms` Loaded PFMs of the motif set. Raises ------ PfmsFileNotFoundError If the motif PFMs file does not exists. """ logger.info(f"Loading motif PFMs set {name!r}") motif_dir = Config().get_motif_path(name) pfms_path = pfms_path_fmt.format(motif_dir, name) if os.path.isfile(pfms_path): pfms = MotifPfms(name=name) pfms.read_pfms(path=pfms_path, format='jaspar') else: raise PfmsFileNotFoundError(name) return pfms
def save_built_pwms(self): """Save built motif PWMs.""" logger.info( f"Saving motif PWMs {self.name!r} under assembly {self.genome!r}") motif_dir = Config().get_motif_path(self.name) pwms_path = pwms_path_fmt.format(motif_dir, self.name, self.genome) self.write_motifscan_pwms(pwms_path)
def test_cli_motif_install(motif_root, tmp_dir): config_file = os.path.join(tmp_dir, "test_cli_motif.motifscanrc") config = Config(config_file) config.set_motif_dir(tmp_dir) config.write() pfms_path = os.path.join(motif_root, "test", "test_pfms.jaspar") args = parser.parse_args( ["motif", "--install", "-n", "test_motif", "-i", pfms_path]) run(args=args, config_file=config_file) motif_path = os.path.join(tmp_dir, "test_motif") assert os.path.isfile(os.path.join(motif_path, "test_motif_pfms.jaspar")) config = Config(config_file) assert config.has_motif_set("test_motif") assert config.get_motif_path("test_motif") == motif_path
def test_config_init(config): assert len(config._config.sections()) == 3 assert config._config.has_section('motifscan') assert config._config.has_section('genome') assert config._config.has_section('motif') assert config._config.get('motifscan', 'genome_dir') == user_genome_dir assert config._config.get('motifscan', 'motif_dir') == user_motif_dir config = Config(path=None) assert config.path == user_rc_path
def install_genome(args, config_file=None): config = Config(config_file) if config.has_genome_assembly(args.name): logger.error(f"Genome assembly {args.name!r} already exists!") sys.exit(1) genome_dir = os.path.abspath( args.output_dir or os.path.join(config.get_genome_dir(), args.name)) logger.info(f"Installing genome assembly {args.name!r} into {genome_dir}") if not os.path.isdir(genome_dir): os.makedirs(genome_dir) if os.listdir(genome_dir): logger.error("Directory not empty! Please specify another directory " "or delete files under it.") sys.exit(1) fasta_path = fasta_path_fmt.format(genome_dir, args.name) bg_freq_path = bg_freq_path_fmt.format(genome_dir, args.name) gene_path = gene_path_fmt.format(genome_dir, args.name) if args.remote: download_dir = os.path.join(genome_dir, 'downloads') try: db = UcscDatabase() dst_fasta = db.download_sequence(args.remote, download_dir) logger.debug(f"Extracting the sequence file to {fasta_path}") merge_extracted_files(dst_fasta, fasta_path) dst_gene = db.download_gene(args.remote, download_dir) logger.debug(f"Extracting the gene annotation file to {gene_path}") merge_extracted_files(dst_gene, gene_path) if args.clean: logger.debug(f"Removing the download directory {download_dir}") shutil.rmtree(download_dir) except RemoteGenomeNotFoundError as e: logger.error(e) sys.exit(1) else: logger.info("Copying the sequence file(s)") merge_files(args.fasta_files, fasta_path) logger.info("Copying the gene annotation file") copy_file(args.gene_file, gene_path) logger.info("Calculating nucleotide frequencies of the genome background") bg_freq = cal_bg_freq(fasta_path) logger.info("Writing nucleotide frequencies") write_bg_freq(bg_freq_path, bg_freq) logger.info("Updating the config file") config.set_genome_path(args.name, genome_dir) config.write() logger.info("Successfully installed!")
def test_cli_motif_uninstall(motif_root, tmp_dir): config_file = os.path.join(tmp_dir, "test_cli_motif.motifscanrc") config = Config(config_file) config.set_motif_dir(tmp_dir) config.write() args = parser.parse_args(["motif", "--uninstall", "test_motif"]) run(args=args, config_file=config_file) config = Config(config_file) assert not config.has_motif_set("test_motif") motif_path = os.path.join(tmp_dir, "test_motif") assert not os.path.isdir(motif_path) args = parser.parse_args(["motif", "--uninstall", "test_motif1"]) with pytest.raises(SystemExit): run(args=args, config_file=config_file)
def test_cli_genome_uninstall(genome_root, tmp_dir): config_file = os.path.join(tmp_dir, "test_cli_genome.motifscanrc") config = Config(config_file) config.set_genome_dir(tmp_dir) config.write() args = parser.parse_args(["genome", "--uninstall", "test_genome"]) run(args=args, config_file=config_file) config = Config(config_file) assert not config.has_genome_assembly("test_genome") genome_path = os.path.join(tmp_dir, "test_genome") assert not os.path.isdir(genome_path) args = parser.parse_args(["genome", "--uninstall", "test_genome1"]) with pytest.raises(SystemExit): run(args=args, config_file=config_file)
def test_cli_genome_install(genome_root, tmp_dir): config_file = os.path.join(tmp_dir, "test_cli_genome.motifscanrc") config = Config(config_file) config.set_genome_dir(tmp_dir) config.write() fasta_path = os.path.join(genome_root, "test", "test.fa") gene_path = os.path.join(genome_root, "test", "test_gene_annotation.txt") args = parser.parse_args([ "genome", "--install", "-n", "test_genome", "-i", fasta_path, "-a", gene_path ]) run(args=args, config_file=config_file) genome_path = os.path.join(tmp_dir, "test_genome") genome = Genome(name="test_genome", path=genome_path) assert genome.fetch_sequence("chr1", 0, 10) == "AaTtCcGgNn" assert genome.genes config = Config(config_file) assert config.has_genome_assembly("test_genome") assert config.get_genome_path("test_genome") == genome_path
def test_cli_motif_list(tmp_dir, capsys): config_file = os.path.join(tmp_dir, "test_cli_motif.motifscanrc") config = Config(config_file) config.set_motif_path("motif_set1", "/path/to/motif1") config.set_motif_path("motif_set2", "/path/to/motif2") config.write() args = parser.parse_args(["motif", "--list"]) run(args=args, config_file=config_file) captured = capsys.readouterr() assert captured.out == "motif_set1\nmotif_set2\n"
def test_cli_genome_list(tmp_dir, capsys): config_file = os.path.join(tmp_dir, "test_cli_genome.motifscanrc") config = Config(config_file) config.set_genome_path("hg19", "/path/to/genome1") config.set_genome_path("hg38", "/path/to/genome2") config.write() args = parser.parse_args(["genome", "--list"]) run(args=args, config_file=config_file) captured = capsys.readouterr() assert captured.out == "hg19\nhg38\n"
def install_motif(args, config_file=None): config = Config(config_file) if config.has_motif_set(args.name): logger.error(f"Motif set {args.name!r} already exists!") sys.exit(1) motif_dir = os.path.abspath( args.output_dir or os.path.join(config.get_motif_dir(), args.name)) logger.info(f"Installing motif set {args.name!r} into {motif_dir}") if not os.path.isdir(motif_dir): os.makedirs(motif_dir) if os.listdir(motif_dir): logger.error("Directory not empty! Please specify another directory " "or delete files under it.") sys.exit(1) pfms_path = pfms_path_fmt.format(motif_dir, args.name) if args.remote: try: db = JasparDatabase() if args.database == 'jaspar_core': dst_pfms = db.download_core(args.remote, motif_dir) else: dst_pfms = db.download_other_collections( args.remote, motif_dir) logger.debug( f"Renaming downloaded file to {os.path.basename(pfms_path)}") shutil.move(dst_pfms, pfms_path) except RemoteMotifPFMsNotFoundError as e: logger.error(e) sys.exit(1) else: logger.info("Copying the PFMs file(s)") merge_files(args.pfm_files, pfms_path) logger.info("Updating the config file") config.set_motif_path(args.name, motif_dir) config.write() logger.info("Successfully installed!") if args.genome: build_motif(args, config_file)
def test_cli_config_get_motif(tmp_dir, capsys): config_file = os.path.join(tmp_dir, "test_cli_config.motifscanrc") config = Config(config_file) config.set_motif_path("motif_set", "/path/to/motif") config.write() args = parser.parse_args(["config", "--get-motif", "motif_set"]) run(args=args, config_file=config_file) captured = capsys.readouterr() assert captured.out == "/path/to/motif\n" with pytest.raises(SystemExit): args = parser.parse_args(["config", "--get-motif", "motif_set1"]) run(args=args, config_file=config_file)
def test_cli_config_get_genome(tmp_dir, capsys): config_file = os.path.join(tmp_dir, "test_cli_config.motifscanrc") config = Config(config_file) config.set_genome_path("hg19", "/path/to/genome") config.write() args = parser.parse_args(["config", "--get-genome", "hg19"]) run(args=args, config_file=config_file) captured = capsys.readouterr() assert captured.out == "/path/to/genome\n" with pytest.raises(SystemExit): args = parser.parse_args(["config", "--get-genome", "hg38"]) run(args=args, config_file=config_file)
def uninstall_motif(args, config_file=None): try: config = Config(config_file) path = config.get_motif_path(args.uninstall) except MotifSetNotFoundError as e: logger.error(e) sys.exit(1) logger.info(f"Uninstalling motif set {args.uninstall!r}") if os.path.isdir(path): logger.info(f"Removing files under {path}") try: shutil.rmtree(path) except Exception as e: logger.error(f"Failed to remove the motif directory: {e}") sys.exit(1) logger.info("Updating the config file") config.remove_motif_path(args.uninstall) config.write() logger.info("Successfully uninstalled!")
def __init__(self, name, path=None): logger.info(f"Loading genome {name!r}") self.name = name self.path = path or Config().get_genome_path(self.name) self._fasta_path = fasta_path_fmt.format(self.path, self.name) self._bg_freq_path = bg_freq_path_fmt.format(self.path, self.name) self._gene_path = gene_path_fmt.format(self.path, self.name) if os.path.isfile(self._fasta_path): self.fa = pysam.FastaFile(self._fasta_path) else: raise GenomeFileNotFoundError(self.name, 'sequence') if os.path.isfile(self._bg_freq_path): self.bg_freq = read_bg_freq(self._bg_freq_path) else: raise GenomeFileNotFoundError(self.name, 'background frequency') if os.path.isfile(self._gene_path): self.genes = read_gene_annotation(self._gene_path) else: logger.warning("No gene annotation file found") self.genes = None self._chroms = None self._chrom_sizes = None
def load_built_pwms(name, genome): """Load built motif PWMs. Parameters ---------- name : str Name of the built motif PWMs set to be loaded. genome : str Genome assembly name under which these PWMs are built. Raises ------ PwmsFileNotFoundError If the motif PWMs file does not exists """ logger.info(f"Loading motif PWMs set {name!r} under genome {genome!r}") motif_dir = Config().get_motif_path(name) pwms_path = pwms_path_fmt.format(motif_dir, name, genome) pwms = MotifPwms(name=name, genome=genome) if os.path.isfile(pwms_path): pwms.read_motifscan_pwms(pwms_path) else: raise PwmsFileNotFoundError(name, genome) return pwms
def test_cli_config_rm_motif(tmp_dir): config_file = os.path.join(tmp_dir, "test_cli_config.motifscanrc") config = Config(config_file) config.set_motif_path("motif_set", "/path/to/motif") config.write() args = parser.parse_args(["config", "--rm-motif", "motif_set"]) run(args=args, config_file=config_file) config = Config(config_file) assert not config.has_motif_set("motif_set") with pytest.raises(MotifSetNotFoundError): config.get_motif_path("motif_set") with pytest.raises(SystemExit): run(args=args, config_file=config_file)
def config(tmp_dir): """Returns a config instance to test the configuration of MotifScan.""" return Config(os.path.join(tmp_dir, '.motifscanrc'))
def test_cli_config_rm_genome(tmp_dir): config_file = os.path.join(tmp_dir, "test_cli_config.motifscanrc") config = Config(config_file) config.set_genome_path("hg19", "/path/to/genome") config.write() args = parser.parse_args(["config", "--rm-genome", "hg19"]) run(args=args, config_file=config_file) config = Config(config_file) assert not config.has_genome_assembly("hg19") with pytest.raises(GenomeNotFoundError): config.get_genome_path("hg19") with pytest.raises(SystemExit): run(args=args, config_file=config_file)
def test_cli_config_show(tmp_dir, capsys): config_file = os.path.join(tmp_dir, "test_cli_config.motifscanrc") config = Config(config_file) config.set_genome_dir("/path/to/genome/root") config.set_motif_dir("/path/to/motif/root") config.set_genome_path("hg19", "/path/to/genome") config.set_motif_path("motif_set", "/path/to/motif") config.write() args = parser.parse_args(["config", "--show"]) run(args=args, config_file=config_file) captured = capsys.readouterr() assert captured.out == "[motifscan]\n" \ "genome_dir: /path/to/genome/root\n" \ "motif_dir: /path/to/motif/root\n\n" \ "[genome]\n" \ "hg19: /path/to/genome\n\n" \ "[motif]\n" \ "motif_set: /path/to/motif\n"
def run(args, config_file=None): config = Config(config_file) modified = False if args.show: print("[motifscan]") print(f"genome_dir: {config.get_genome_dir()}") print(f"motif_dir: {config.get_motif_dir()}") print("\n[genome]") for name, path in config.list_genome_assemblies(): print(f"{name}: {path}") print("\n[motif]") for name, path in config.list_motif_sets(): print(f"{name}: {path}") return if args.set_default_genome: logger.debug( "Setting the default installation path for genome assemblies") config.set_genome_dir(os.path.abspath(args.set_default_genome)) modified = True if args.set_default_motif: logger.debug("Setting the default installation path for motif sets") config.set_motif_dir(os.path.abspath(args.set_default_motif)) modified = True if args.get_genome: logger.debug(f"Getting the genome path of {args.get_genome!r}") try: print(config.get_genome_path(args.get_genome)) except GenomeNotFoundError as e: logger.error(e) sys.exit(1) if args.set_genome: name = args.set_genome[0] path = os.path.abspath(args.set_genome[1]) logger.debug(f"Setting the genome path for {name!r}: {path}") config.set_genome_path(name, path) modified = True if args.rm_genome: logger.debug(f"Removing the genome path for {args.rm_genome!r}") try: config.remove_genome_path(args.rm_genome) modified = True except GenomeNotFoundError as e: logger.error(e) sys.exit(1) if args.get_motif: logger.debug(f"Getting the motif path of {args.get_motif!r}") try: print(config.get_motif_path(args.get_motif)) except MotifSetNotFoundError as e: logger.error(e) sys.exit(1) if args.set_motif: name = args.set_motif[0] path = os.path.abspath(args.set_motif[1]) logger.debug(f"Setting the motif path for {name!r}: {path}") config.set_motif_path(name, path) modified = True if args.rm_motif: logger.debug(f"Removing the motif path for {args.rm_motif!r}") try: config.remove_motif_path(args.rm_motif) modified = True except MotifSetNotFoundError as e: logger.error(e) sys.exit(1) if modified: logger.debug(f"Updating the config file: {config.path}") config.write() logger.debug("Done")
def test_invalid_config(data_dir): with pytest.raises(InvalidConfigFileError): Config(os.path.join(data_dir, 'invalid.motifscanrc'))