def main(): """Entry point.""" opts = docopt(__doc__) data = tofile.read_file("%s/config.yaml" % opts["--in"]) meta = yaml.full_load(data) prefix = meta["assembly"]["prefix"] blobdir = prefix revision = meta.get("revision", 0) if revision: blobdir += ".%d" % int(revision) indir = opts["--in"] outdir = "%s/%s" % (opts["--out"], prefix) bindir = opts.get("--bin", None) Path(outdir).mkdir(parents=True, exist_ok=True) create_pipeline_directory(indir, "%s/%s.pipeline" % (indir, prefix)) transfer_files(r"%s/*.tar" % indir, outdir) transfer_files(r"%s/*.gz" % indir, outdir) transfer_files("%s/%s.pipeline" % (indir, prefix), outdir, compress=True) if Path("%s/view" % indir).is_dir(): create_static_directory( "%s/view/%s" % (indir, blobdir), "%s/%s.static" % (indir, blobdir) ) transfer_files("%s/view/%s" % (indir, blobdir), outdir, compress=True) transfer_files("%s/%s.static" % (indir, blobdir), outdir) else: transfer_files("%s/blobtools/%s" % (indir, blobdir), outdir, compress=True) transfer_files(r"%s/blastn/%s.*.out" % (indir, prefix), outdir, compress=True) transfer_files("%s/blobtools/%s.meta.yaml" % (indir, prefix), outdir, compress=True) transfer_files(r"%s/busco/%s.busco.*_odb10" % (indir, prefix), outdir) transfer_files("%s/config.yaml" % indir, outdir) transfer_files( "%s/cov_stats/%s.chunk_stats.tsv" % (indir, prefix), outdir, compress=True ) transfer_files( r"%s/diamond/%s.*.out" % (indir, prefix), outdir, rename=("diamond.", "diamond_blastx."), compress=True, ) transfer_files( "%s/diamond/%s.fasta.chunks" % (indir, prefix), outdir, rename=("fasta.chunks", "chunks.fasta"), compress=True, ) transfer_files( r"%s/diamond_blastp/%s.*.out" % (indir, prefix), outdir, rename=("diamond.", "diamond_blastp."), compress=True, ) transfer_files(r"%s/minimap/%s.*.bam" % (indir, prefix), outdir) transfer_files("%s/pipeline/%s" % (indir, prefix), outdir, compress=True) transfer_files( "%s/window_stats/%s.window_stats.tsv" % (indir, prefix), outdir, compress=True ) transfer_files( "%s/windowmasker/%s.windowmasker.fasta" % (indir, prefix), outdir, compress=True ) remove_unwanted_files(indir, bindir)
def remove_unwanted_files(indir, bindir): """Move unwanted files to bin or delete.""" LOGGER.info("Removing unwanted files") if bindir is not None: LOGGER.info("Moving %s to %s", indir, bindir) Path(bindir).mkdir(parents=True, exist_ok=True) shutil.move(indir, bindir) else: LOGGER.info("Deleting %s" % indir) shutil.rmtree(indir) if __name__ == "__main__": opts = docopt(__doc__) data = tofile.read_file("%s/config.yaml" % opts["--in"]) meta = yaml.full_load(data) prefix = meta["assembly"]["prefix"] blobdir = prefix revision = meta.get("revision", 0) if revision: blobdir += ".%d" % int(revision) indir = opts["--in"] outdir = "%s/%s" % (opts["--out"], prefix) bindir = opts.get("--bin", None) Path(outdir).mkdir(parents=True, exist_ok=True) create_pipeline_directory(indir, "%s/%s.pipeline" % (indir, prefix)) transfer_files(r"%s/*.tar" % indir, outdir) transfer_files(r"%s/*.gz" % indir, outdir) transfer_files("%s/%s.pipeline" % (indir, prefix), outdir, compress=True) if Path("%s/view" % indir).is_dir():
def test_read_file_that_does_not_exist(): """Test read_file with non-existent files.""" assert tofile.read_file("nofile") is None assert tofile.read_file("nofile.gz") is None assert tofile.read_file("tests/files/nofile") is None assert tofile.read_file("tests/files/nofile.gz") is None
def test_read_file_that_exists(): """Test read_file with a valid files.""" assert tofile.read_file("tests/files/infile") == "testfile content\n" assert tofile.read_file("tests/files/infile.gz") == "testfile content\n"
project_list = set() for child_bioproject in children: sleep(0.5) accession = fetch_accession(child_bioproject) if accession is not None: accessions.append(accession) # project_list.add(child_bioproject) project_list.add(new_projects[child_bioproject]) return accessions, project_list if __name__ == "__main__": opts = docopt(__doc__) bioproject = opts["<BIOPROJECT>"] outdir = opts["--out"] os.makedirs(outdir, exist_ok=True) projects_file = "%s/bioprojects.processed" % outdir targets_file = "%s/accessions.todo" % outdir active_file = "%s/accessions.active" % outdir processed_file = "%s/accessions.processed" % outdir projects = tofile.read_file(projects_file) accessions, new_projects = fetch_bioproject_accessions(bioproject, projects=projects) with open(targets_file, "a+") as tfh: tfh.writelines([accession + "\n" for accession in accessions]) with open(projects_file, "a+") as pfh: pfh.writelines([bioproject + "\n" for bioproject in new_projects]) # TODO: introduce page numbering for when list gets long