def main(): argparser = ArgumentParser(description="Peptide/protein blast helper tool") argparser.add_argument('--db', type=str, required=False, help="Database.fasta") argparser.add_argument('--idb', type=str, required=False, help="Indexed database.fasta") argparser.add_argument('--pep', type=str, required=False, help="Peptides.fasta") argparser.add_argument('--cont', type=int, required=False, default=5, help="Minimal length of continuous fragment") argparser.add_argument('--leftmin', type=int, required=False, default=3, help="Minimal length of left fragment") argparser.add_argument('--rightmin', type=int, required=False, default=3, help="Minimal length of right fragment") argparser.add_argument('--summin', type=int, required=False, default=8, help="Minimal total mathed length") argparser.add_argument('--gapmax', type=int, required=False, default=3, help="Minimal gap size") argparser.add_argument('--threads', type=int, required=False, default=4, help="Blast threads") argparser.add_argument('--chunksize', type=int, required=False, default=None, help="Force split in chunksize") argparser.add_argument('--eval', type=float, required=False, help="Required e-value for blast, otherwise estimated") argparser.add_argument('--wordsize', type=int, required=False, default=2, help="Blast word size") argparser.add_argument('-s', action='store_true', required=False, help='Use blast-short instead of blast.') argparser.add_argument('--printold', action='store_true', required=False, help='Use old filtering/printing method.') argparser.add_argument('-S', action='store_true', required=False, help='Suppress 5+ hits in 2 file.') argparser.add_argument('--sort', action='store_true', required=False, help='Sort output.') argparser.add_argument('--keeptmp', action='store_true', required=False, help='Keep temporary files.') argparser.add_argument('--usetmp', type=str, required=False, default= None, help="Use existing temporary directory" "Do not perform blast.") argparser = argparser.parse_args() if argparser.usetmp is not None: if os.path.isdir(argparser.usetmp): argparser.pep = "pep" argparser.db = [f.replace(".pin", "") for f in os.listdir(argparser.usetmp) if ".pin" in f][0] else: print("Wrong tmp directory!") sys.exit(1) else: if (argparser.pep is None) or (argparser.db is None and argparser.idb is None): print ("Provide query and database or use tmp dir!") sys.exit(1) if os.path.exists(TMP_DIR) and not argparser.usetmp: shutil.rmtree(TMP_DIR) if not argparser.usetmp: os.mkdir(TMP_DIR) if not argparser.usetmp: if argparser.idb: db_name = argparser.idb else: db_name = os.path.join(TMP_DIR, argparser.db) makeblastdb(argparser.db, db_name) parallel_blast(argparser, db_name) with open(argparser.pep + "_" + db_name.split("/")[-1] + "_1.txt", "w") as out1, \ open(argparser.pep + "_" + db_name.split("/")[-1] + "_2.txt", "w") as out2: pool = multiprocessing.Pool(processes=argparser.threads) results1, results2 = zip(*pool.map(read_blast_xml, zip(getfiles(TMP_DIR, "xml"), itertools.repeat(argparser)))) results1 = reduce(operator.add, results1, []) results2 = reduce(operator.add, results2, []) if argparser.sort: print ("Sorting results...") results1.sort(key = operator.itemgetter(1), reverse=True) results2.sort(key = operator.itemgetter(1), reverse=True) for r, i in results1: out1.writelines(r) for r, i in results2: out2.writelines(r) if not (argparser.keeptmp or argparser.usetmp): shutil.rmtree(TMP_DIR)