Example #1
0
def main():
    """Entry point."""
    opts = docopt(__doc__)
    data = tofile.read_file("%s/config.yaml" % opts["--in"])
    meta = yaml.full_load(data)
    prefix = meta["assembly"]["prefix"]
    blobdir = prefix
    revision = meta.get("revision", 0)
    if revision:
        blobdir += ".%d" % int(revision)
    indir = opts["--in"]
    outdir = "%s/%s" % (opts["--out"], prefix)
    bindir = opts.get("--bin", None)
    Path(outdir).mkdir(parents=True, exist_ok=True)
    create_pipeline_directory(indir, "%s/%s.pipeline" % (indir, prefix))
    transfer_files(r"%s/*.tar" % indir, outdir)
    transfer_files(r"%s/*.gz" % indir, outdir)
    transfer_files("%s/%s.pipeline" % (indir, prefix), outdir, compress=True)
    if Path("%s/view" % indir).is_dir():
        create_static_directory(
            "%s/view/%s" % (indir, blobdir), "%s/%s.static" % (indir, blobdir)
        )
        transfer_files("%s/view/%s" % (indir, blobdir), outdir, compress=True)
        transfer_files("%s/%s.static" % (indir, blobdir), outdir)
    else:
        transfer_files("%s/blobtools/%s" % (indir, blobdir), outdir, compress=True)
    transfer_files(r"%s/blastn/%s.*.out" % (indir, prefix), outdir, compress=True)
    transfer_files("%s/blobtools/%s.meta.yaml" % (indir, prefix), outdir, compress=True)
    transfer_files(r"%s/busco/%s.busco.*_odb10" % (indir, prefix), outdir)
    transfer_files("%s/config.yaml" % indir, outdir)
    transfer_files(
        "%s/cov_stats/%s.chunk_stats.tsv" % (indir, prefix), outdir, compress=True
    )
    transfer_files(
        r"%s/diamond/%s.*.out" % (indir, prefix),
        outdir,
        rename=("diamond.", "diamond_blastx."),
        compress=True,
    )
    transfer_files(
        "%s/diamond/%s.fasta.chunks" % (indir, prefix),
        outdir,
        rename=("fasta.chunks", "chunks.fasta"),
        compress=True,
    )
    transfer_files(
        r"%s/diamond_blastp/%s.*.out" % (indir, prefix),
        outdir,
        rename=("diamond.", "diamond_blastp."),
        compress=True,
    )
    transfer_files(r"%s/minimap/%s.*.bam" % (indir, prefix), outdir)
    transfer_files("%s/pipeline/%s" % (indir, prefix), outdir, compress=True)
    transfer_files(
        "%s/window_stats/%s.window_stats.tsv" % (indir, prefix), outdir, compress=True
    )
    transfer_files(
        "%s/windowmasker/%s.windowmasker.fasta" % (indir, prefix), outdir, compress=True
    )
    remove_unwanted_files(indir, bindir)
Example #2
0
def remove_unwanted_files(indir, bindir):
    """Move unwanted files to bin or delete."""
    LOGGER.info("Removing unwanted files")
    if bindir is not None:
        LOGGER.info("Moving %s to %s", indir, bindir)
        Path(bindir).mkdir(parents=True, exist_ok=True)
        shutil.move(indir, bindir)
    else:
        LOGGER.info("Deleting %s" % indir)
        shutil.rmtree(indir)


if __name__ == "__main__":
    opts = docopt(__doc__)
    data = tofile.read_file("%s/config.yaml" % opts["--in"])
    meta = yaml.full_load(data)
    prefix = meta["assembly"]["prefix"]
    blobdir = prefix
    revision = meta.get("revision", 0)
    if revision:
        blobdir += ".%d" % int(revision)
    indir = opts["--in"]
    outdir = "%s/%s" % (opts["--out"], prefix)
    bindir = opts.get("--bin", None)
    Path(outdir).mkdir(parents=True, exist_ok=True)
    create_pipeline_directory(indir, "%s/%s.pipeline" % (indir, prefix))
    transfer_files(r"%s/*.tar" % indir, outdir)
    transfer_files(r"%s/*.gz" % indir, outdir)
    transfer_files("%s/%s.pipeline" % (indir, prefix), outdir, compress=True)
    if Path("%s/view" % indir).is_dir():
Example #3
0
def test_read_file_that_does_not_exist():
    """Test read_file with non-existent files."""
    assert tofile.read_file("nofile") is None
    assert tofile.read_file("nofile.gz") is None
    assert tofile.read_file("tests/files/nofile") is None
    assert tofile.read_file("tests/files/nofile.gz") is None
Example #4
0
def test_read_file_that_exists():
    """Test read_file with a valid files."""
    assert tofile.read_file("tests/files/infile") == "testfile content\n"
    assert tofile.read_file("tests/files/infile.gz") == "testfile content\n"
    project_list = set()
    for child_bioproject in children:
        sleep(0.5)
        accession = fetch_accession(child_bioproject)
        if accession is not None:
            accessions.append(accession)
            # project_list.add(child_bioproject)
            project_list.add(new_projects[child_bioproject])
    return accessions, project_list


if __name__ == "__main__":
    opts = docopt(__doc__)
    bioproject = opts["<BIOPROJECT>"]
    outdir = opts["--out"]
    os.makedirs(outdir, exist_ok=True)
    projects_file = "%s/bioprojects.processed" % outdir
    targets_file = "%s/accessions.todo" % outdir
    active_file = "%s/accessions.active" % outdir
    processed_file = "%s/accessions.processed" % outdir
    projects = tofile.read_file(projects_file)

    accessions, new_projects = fetch_bioproject_accessions(bioproject,
                                                           projects=projects)
    with open(targets_file, "a+") as tfh:
        tfh.writelines([accession + "\n" for accession in accessions])
    with open(projects_file, "a+") as pfh:
        pfh.writelines([bioproject + "\n" for bioproject in new_projects])

    # TODO: introduce page numbering for when list gets long