Esempio n. 1
0
def setupdir(strains, genomedb):
    try:
        os.makedirs(outdir)
    except OSError as exc:
        if exc.errno == errno.EEXIST:
            print "Database folder exists:", outdir

    pp.createdirs(outdir, ["faa", "m8", "out", "paranoid_output", "dmnd_tmp"])

    if not os.path.isdir(genomedb):
        print "GenomeDB folder", genomedb, "doesn't exist...exiting."
        sys.exit()

    if not os.path.isdir(os.path.join(genomedb, "pep")):
        print "GenomeDB folder is missing a 'pep' folder...exiting."
        sys.exit()

    if verbose:
        print "Formatting", len(strains), "fasta files..."
    for s in strains:
        try:
            i = open(os.path.join(genomedb, "pep", s + ".pep.fa"), "r")
        except IOError as exc:
            if exc.errno == 2:
                print s, 'not found in database...check your strainlist.'
                sys.exit()
        o = open(os.path.join(outdir, "faa", s + ".faa"), "w")
        for seq in SeqIO.parse(i, 'fasta'):
            seq.id = s + "|" + str(seq.id)
            SeqIO.write(seq, o, 'fasta')
        o.close()
    return
Esempio n. 2
0
def main():
    args = parse_args()
    global prefix
    prefix = os.path.abspath(args.prefix)
    global outdir
    outdir = os.path.abspath(args.outdir)

    pp.createdirs(prefix, ["orthos", "ortho_align", "hmms"])
    if args.strains:
        strains = [
            line.rstrip() for line in open(os.path.abspath(args.strains), 'r')
        ]
    else:
        strains = get_strains()

    if args.orthos:
        orthos = [
            line.rstrip() for line in open(os.path.abspath(args.orthos), 'r')
        ]
    else:
        if args.threshold:
            orthos = parse_threshold_matrix(args.threshold, strains)
        else:
            orthos = parse_matrix(strains)

    if args.cpus:
        cpus = args.cpus
    else:
        cpus = mp.cpu_count()

    global use_MP
    if args.use_MP:
        use_MP = True
    else:
        use_MP = False

    index_hmms()
    extract_hmms(orthos)

    get_orthos(orthos, strains)
    align_orthos(orthos, cpus)
    create_master_alignment(orthos, strains)
    if args.clean:
        pp.cleanup(os.path.join(prefix, "ortho_align"))
        pp.cleanup(os.path.join(prefix, "orthos"))
        pp.cleanup(os.path.join(prefix, "hmms"))
Esempio n. 3
0
def main():
    args = parse_args()
    global pypath
    pypath = os.path.abspath(os.path.dirname(sys.argv[0]))
    global outdir
    outdir = os.path.abspath(args.outdir)
    genomedb = os.path.abspath(args.genomedb)
    new_strains = [
        line.rstrip()
        for line in open(os.path.abspath(args.new_strainlist), 'r')
    ]

    global cpus
    if args.cpus:
        cpus = args.cpus
    else:
        cpus = mp.cpu_count()

    global use_MP
    if args.use_MP:
        use_MP = True
    else:
        use_MP = False

    pp.createdirs(outdir, [
        "prop_faa", "prop_dmnd", "prop_m8", "prop_out", "prop_paranoid_output",
        "prop_homolog_faa"
    ])
    check_strains(new_strains, genomedb)
    make_diamond_databases(new_strains)
    run_diamond(new_strains)
    genes = get_genes(new_strains)
    parse_diamond(genes)
    run_inparanoid(new_strains, pypath)
    group_members = parse_inparanoid(new_strains)
    extract_fastas(genes, group_members)
    pp.dump_matrices(outdir)
    for f in [
            "prop_m8", "prop_out", "prop_dmnd", "prop_paranoid_output",
            "prop_faa", "prop_homolog_faa"
    ]:
        pp.cleanup(os.path.join(outdir, f))
Esempio n. 4
0
def main():
    args = parse_args()
    genomedb = os.path.abspath(args.genomedb)
    strains = [x.rstrip() for x in open(os.path.abspath(args.strainlist), 'r')]
    if len(set(strains)) != len(strains):
        print "Duplicate entry in strainlist! Exiting..."

    global outdir, pypath
    outdir = os.path.abspath(args.outdir)
    pypath = os.path.abspath(os.path.dirname(sys.argv[0]))

    if args.mode:
        if args.mode not in [
                "multi_setup", "parse", "extract", "cluster", "build"
        ]:
            print "Unknown mode!!! Exiting..."
            sys.exit()

    global cpus
    if args.cpus:
        cpus = args.cpus
    else:
        cpus = mp.cpu_count()

    global clean
    if args.clean:
        clean = True
    else:
        clean = False

    global verbose
    if args.verbose:
        verbose = True
    else:
        verbose = False

    global inflate
    if args.inflate:
        inflate = args.inflate
    else:
        inflate = 2.0

    global threshold
    if args.threshold:
        threshold = args.threshold
    else:
        threshold = 0

    global multi
    if args.multi:
        multi = True
    else:
        multi = False

    global use_MP
    if args.use_MP:
        use_MP = True
    else:
        use_MP = False

    if not args.mode or args.mode == "multi_setup":
        setupdir(strains, genomedb)
        shutil.copy(os.path.abspath(args.strainlist),
                    os.path.join(outdir, "strainlist.txt"))
        make_diamond_databases(strains)
        run_diamond(strains)
    if not args.mode or args.mode == "parse":
        genes = get_genes(strains)
        parse_diamond(genes, strains)
        run_inparanoid(strains)
    if not args.mode or args.mode == "cluster":
        if clean:
            pp.cleanup(os.path.join(outdir, "out"))
        pp.createdirs(outdir, ["mcl"])
        create_abc_file()
        mcxload()
        mcl_cluster()
        mcxdump()
    if not args.mode or args.mode == "extract":
        seqdata, desc, seq_number = hash_fastas()
        pp.createdirs(
            outdir,
            ["homolog_faa", "clustered", "aligned", "hmms", "consensus_seqs"])
        parse_clusters(strains, seq_number)
        parse_groups(seqdata, desc)
    if not args.mode or args.mode == "build":
        cdhit_seqs()
        align_groups()
        if clean:
            pp.cleanup(os.path.join(outdir, "clustered"))
        build_hmms()
        if clean:
            pp.cleanup(os.path.join(outdir, "aligned"))
        emit_consensus_seqs()
        combine_seqs()
        combine_homologs()
        if clean:
            pp.cleanup(os.path.join(outdir, "hmms"))
            pp.cleanup(os.path.join(outdir, "consensus_seqs"))
            pp.cleanup(os.path.join(outdir, "m8"))
            pp.cleanup(os.path.join(outdir, "paranoid_output"))
            pp.cleanup(os.path.join(outdir, "dmnd_tmp"))
            pp.cleanup(os.path.join(outdir, "faa"))
            pp.cleanup(os.path.join(outdir, "homolog_faa"))
            pp.cleanup(os.path.join(outdir, "mcl"))
            os.remove(os.path.join(outdir, "all_strains.dmnd"))
        pp.dump_matrices(outdir)