def test_write_file_yaml(tmpdir): """Test write_file YAML file output.""" path = tmpdir.mkdir("sub").join("outfile.yaml") pathname = abspath(path) tofile.write_file(pathname, VALUE) assert len(tmpdir.listdir()) == 1 assert path.read() == EXAMPLE_YAML tmpdir.remove()
def test_write_file_json(tmpdir): """Test write_file JSON file output.""" path = tmpdir.mkdir("sub").join("outfile.json") pathname = abspath(path) tofile.write_file(pathname, VALUE) assert len(tmpdir.listdir()) == 1 assert path.read() == EXAMPLE_JSON tmpdir.remove()
def test_write_file_yaml_gz(tmpdir): """Test write_file gzipped YAML output.""" path = tmpdir.mkdir("sub").join("outfile.yaml.gz") pathname = abspath(path) tofile.write_file(pathname, VALUE) assert len(tmpdir.listdir()) == 1 with open(pathname, "rb") as fh: assert binascii.hexlify(fh.read(2)) == b"1f8b" tmpdir.remove()
def test_write_file_plain(tmpdir): """Test write_file text file output.""" path = tmpdir.mkdir("sub").join("outfile.txt") pathname = abspath(path) example_string = "file content\n" tofile.write_file(pathname, example_string) assert len(tmpdir.listdir()) == 1 assert path.read() == example_string tmpdir.remove()
def test_write_file_invalid_path(): """Test write_file to bad path.""" example_string = "file content\n" assert tofile.write_file("path/does/not/exist", example_string) is False assert tofile.write_file("path/does/not/exist.gz", example_string) is False
def main(): """Entry point.""" opts = docopt(__doc__) accession = opts["<ACCESSION>"] outdir = opts["--out"] dbdir = opts["--db"] buscodir = "%s/busco" % dbdir uniprotdir = "%s/uniprot" % dbdir ntdir = "%s/nt" % dbdir taxdumpdir = "%s/taxdump" % dbdir if opts["--db-suffix"]: buscodir += "_%s" % opts["--db-suffix"] ntdir += "_%s" % opts["--db-suffix"] uniprotdir += "_%s" % opts["--db-suffix"] taxdumpdir += "_%s" % opts["--db-suffix"] if not outdir.endswith(accession): outdir += "/%s" % accession os.makedirs(outdir, exist_ok=True) meta = parse_assembly_meta(accession) assembly_url = fetch_assembly_url(accession, opts["--api-key"]) if assembly_url is None: LOGGER.error("Unable to find assembly URL") sys.exit(1) assembly_file = "%s/assembly/%s.fasta.gz" % (outdir, accession) meta["assembly"].update({"file": assembly_file, "url": assembly_url}) assembly_report = "%s/assembly/%s.report.txt" % (outdir, accession) syn_filename = "%s/assembly/%s.synonyms.tsv" % (outdir, accession) cat_filename = "%s/assembly/%s.categories.tsv" % (outdir, accession) meta["fields"] = { "synonyms": { "file": syn_filename, "prefix": "insdc" }, "categories": { "file": cat_filename }, } if opts["--download"]: os.makedirs(buscodir, exist_ok=True) os.makedirs("%s/assembly" % outdir, exist_ok=True) fetch_assembly_fasta(assembly_url, assembly_file) report_url = assembly_url.replace("_genomic.fna.gz", "_assembly_report.txt") fetch_assembly_report(report_url, assembly_report, cat_filename, syn_filename) taxon_meta = fetch_goat_data(meta["taxon"]["taxid"]) add_taxon_to_meta(meta, taxon_meta) set_btk_version(meta) busco_sets = find_busco_lineages(taxon_meta["lineage"]) if busco_sets: meta["busco"].update({ "download_dir": buscodir, "lineages": busco_sets, "basal_lineages": [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10", ], }) if opts["--download"]: fetch_busco_lineages(busco_sets, buscodir) read_accessions = [] if meta["assembly"]["biosample"]: read_accessions = [meta["assembly"]["biosample"]] if opts["--reads"]: read_accessions += opts["--reads"] sra = assembly_reads(read_accessions, int(opts["--read-runs"]), opts["--platforms"]) if sra: if opts["--coverage"]: meta["reads"].update( {"coverage": { "max": int(opts["--coverage"]) }}) readdir = "%s/reads" % outdir add_reads_to_meta(meta, sra, readdir) if opts["--download"]: os.makedirs(readdir, exist_ok=True) for library in sra: fetch_read_files(library) meta["similarity"]["blastn"].update({"path": ntdir}) meta["similarity"]["diamond_blastx"].update({"path": uniprotdir}) meta["similarity"]["diamond_blastp"].update({"path": uniprotdir}) meta["settings"]["taxdump"] = taxdumpdir tofile.write_file("%s/config.yaml" % outdir, meta)
def parse_assembly_report(filename, cat_filename, syn_filename): """Parse synonyms and assembly level into tsv files.""" synonyms = [] categories = [] cats = { "identifier": { "index": 4, "list": [] }, "assembly_role": { "index": 1, "list": [] }, "assembly_level": { "index": 3, "list": [] }, "assembly_unit": { "index": 7, "list": [] }, } names = { "identifier": { "index": 4, "list": [] }, "name": { "index": 0, "list": [] }, "assigned_name": { "index": 2, "list": [] }, "refseq_accession": { "index": 6, "list": [] }, } with tofile.open_file_handle(filename) as fh: for line in fh: if line.startswith("#"): continue row = line.rstrip().split("\t") for group in (cats, names): for obj in group.values(): value = row[obj["index"]] obj["list"].append(value) header = [] for key, obj in cats.items(): if len(set(obj["list"])) > 1: header.append(key) categories.append(header) for idx, value in enumerate(cats[header[0]]["list"]): row = [value] for key in header[1:]: row.append(cats[key]["list"][idx]) categories.append(row) tofile.write_file(cat_filename, categories) header = [] for key, obj in names.items(): if len(set(obj["list"])) > 1: header.append(key) synonyms.append(header) for idx, value in enumerate(names[header[0]]["list"]): row = [value] for key in header[1:]: row.append(names[key]["list"][idx]) synonyms.append(row) tofile.write_file(syn_filename, synonyms)
"bacteria_odb10", "archaea_odb10", ], }) if opts["--download"]: fetch_busco_lineages(busco_sets, buscodir) read_accessions = [] if meta["assembly"]["biosample"]: read_accessions = [meta["assembly"]["biosample"]] if opts["--reads"]: read_accessions += opts["--reads"] sra = assembly_reads(read_accessions, int(opts["--read-runs"]), opts["--platforms"]) if sra: if opts["--coverage"]: meta["reads"].update( {"coverage": { "max": int(opts["--coverage"]) }}) readdir = "%s/reads" % outdir add_reads_to_meta(meta, sra, readdir) if opts["--download"]: os.makedirs(readdir, exist_ok=True) for library in sra: fetch_read_files(library) meta["similarity"]["blastn"].update({"path": ntdir}) meta["similarity"]["diamond_blastx"].update({"path": uniprotdir}) meta["similarity"]["diamond_blastp"].update({"path": uniprotdir}) meta["settings"]["taxdump"] = taxdumpdir tofile.write_file("%s/config.yaml" % outdir, meta)