Ejemplo n.º 1
0
def main():
    argparser = ArgumentParser(description="Peptide/protein blast helper tool")
    argparser.add_argument('--db', type=str, required=False, help="Database.fasta")
    argparser.add_argument('--idb', type=str, required=False, help="Indexed database.fasta")
    argparser.add_argument('--pep', type=str, required=False, help="Peptides.fasta")
    argparser.add_argument('--cont', type=int, required=False, default=5, help="Minimal length of continuous fragment")
    argparser.add_argument('--leftmin', type=int, required=False, default=3, help="Minimal length of left fragment")
    argparser.add_argument('--rightmin', type=int, required=False, default=3, help="Minimal length of right fragment")
    argparser.add_argument('--summin', type=int, required=False, default=8, help="Minimal total mathed length")
    argparser.add_argument('--gapmax', type=int, required=False, default=3, help="Minimal gap size")
    argparser.add_argument('--threads', type=int, required=False, default=4, help="Blast threads")
    argparser.add_argument('--chunksize', type=int, required=False, default=None, help="Force split in chunksize")
    argparser.add_argument('--eval', type=float, required=False, help="Required e-value for blast, otherwise estimated")
    argparser.add_argument('--wordsize', type=int, required=False, default=2, help="Blast word size")
    argparser.add_argument('-s', action='store_true', required=False, help='Use blast-short instead of blast.')
    argparser.add_argument('--printold', action='store_true', required=False, help='Use old filtering/printing method.')
    argparser.add_argument('-S', action='store_true', required=False, help='Suppress 5+ hits in 2 file.')
    argparser.add_argument('--sort', action='store_true', required=False, help='Sort output.')
    argparser.add_argument('--keeptmp', action='store_true', required=False, help='Keep temporary files.')
    argparser.add_argument('--usetmp', type=str, required=False, default= None, help="Use existing temporary directory"
                                                                                                     "Do not perform blast.")

    argparser = argparser.parse_args()

    if argparser.usetmp is not None:
        if os.path.isdir(argparser.usetmp):
            argparser.pep = "pep"
            argparser.db = [f.replace(".pin", "") for f in os.listdir(argparser.usetmp) if ".pin" in f][0]
        else:
            print("Wrong tmp directory!")
            sys.exit(1)
    else:
        if (argparser.pep is None) or (argparser.db is None and argparser.idb is None):
            print ("Provide query and database or use tmp dir!")
            sys.exit(1)


    if os.path.exists(TMP_DIR) and not argparser.usetmp:
        shutil.rmtree(TMP_DIR)
    if not argparser.usetmp:
        os.mkdir(TMP_DIR)

    if not argparser.usetmp:
        if argparser.idb:
            db_name = argparser.idb
        else:
            db_name = os.path.join(TMP_DIR, argparser.db)
            makeblastdb(argparser.db, db_name)
        parallel_blast(argparser, db_name)


    with open(argparser.pep + "_" + db_name.split("/")[-1] + "_1.txt", "w") as out1, \
         open(argparser.pep + "_" + db_name.split("/")[-1] + "_2.txt", "w") as out2:
        pool = multiprocessing.Pool(processes=argparser.threads)
        results1, results2 = zip(*pool.map(read_blast_xml, zip(getfiles(TMP_DIR, "xml"),
                                               itertools.repeat(argparser))))
        results1 = reduce(operator.add, results1, [])
        results2 = reduce(operator.add, results2, [])

        if argparser.sort:
            print ("Sorting results...")
            results1.sort(key = operator.itemgetter(1), reverse=True)
            results2.sort(key = operator.itemgetter(1), reverse=True)
        for r, i in results1:
            out1.writelines(r)
        for r, i in results2:
            out2.writelines(r)


    if not (argparser.keeptmp or argparser.usetmp):
        shutil.rmtree(TMP_DIR)