def setup_db(work_dir, db_dir, target_org, fs, lfs): """Copy over BLAST database files, prepping them for map availability. """ (_, org_names, db_refs) = blast.get_org_dbs(db_dir, target_org) work_dir = os.path.join(work_dir, db_dir) fs.create_directory(work_dir) db_refs = db_refs ref_info = [] blast_dbs = [] for db_path in db_refs: blast_dbs.append(os.path.basename(db_path)) for fname in glob.glob(db_path + ".[p|n]*"): hdfs_ref = _hdfs_ref(work_dir, fname) lfs.copy(fname, fs, hdfs_ref) ref_info.append("%s#%s" % (hdfs_ref, os.path.basename(hdfs_ref))) return ",".join(ref_info), blast_dbs, org_names
def main(org_config_file, config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) with open(org_config_file) as in_handle: org_config = yaml.load(in_handle) if not os.path.exists(config['work_dir']): os.makedirs(config['work_dir']) (_, org_names, db_refs) = blast.get_org_dbs(config['db_dir'], org_config['target_org']) id_file, score_file = setup_output_files(org_config['target_org'], org_names) file_info = [id_file, score_file] pool = multiprocessing.Pool(int(config['num_cores'])) with open(org_config['search_file']) as in_handle: pool.map(_process_wrapper, ((rec, db_refs, file_info, config['work_dir']) for rec in SeqIO.parse(in_handle, "fasta")))
def main(org_config_file, config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) with open(org_config_file) as in_handle: org_config = yaml.load(in_handle) if not os.path.exists(config['work_dir']): os.makedirs(config['work_dir']) (_, org_names, db_refs) = blast.get_org_dbs(config['db_dir'], org_config['target_org']) id_file, score_file = setup_output_files(org_config['target_org'], org_names) file_info = [id_file, score_file] pool = multiprocessing.Pool(int(config['num_cores'])) with open(org_config['search_file']) as in_handle: pool.map(_process_wrapper, ((rec, db_refs, file_info, config['work_dir'], config.get("blast_cmd")) for rec in SeqIO.parse(in_handle, "fasta")))
def setup_db(work_dir_base, db_dir, target_org): """Copy over BLAST database files, prepping them for map availability. """ (_, org_names, db_refs) = blast.get_org_dbs(db_dir, target_org) work_dir = os.path.join(work_dir_base, db_dir) cl = ["hadoop", "fs", "-mkdir", work_dir] subprocess.check_call(cl) ref_info = [] blast_dbs = [] for db_path in db_refs: blast_dbs.append(os.path.basename(db_path)) for fname in glob.glob(db_path + ".[p|n]*"): hdfs_ref = _hdfs_ref(work_dir, fname) cl = ["hadoop", "fs", "-put", fname, hdfs_ref] subprocess.check_call(cl) ref_info.append("%s#%s" % (hdfs_ref, os.path.basename(hdfs_ref))) return ref_info, blast_dbs, org_names
def main(org_config_file, config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) with open(org_config_file) as in_handle: org_config = yaml.load(in_handle) if not os.path.exists(config["work_dir"]): os.makedirs(config["work_dir"]) (_, org_names, db_refs) = blast.get_org_dbs(config["db_dir"], org_config["target_org"]) id_file, score_file = setup_output_files(org_config["target_org"]) with open(org_config["search_file"]) as in_handle: with open(id_file, "w") as id_out_handle: with open(score_file, "w") as score_out_handle: id_writer = csv.writer(id_out_handle, dialect="excel-tab") score_writer = csv.writer(score_out_handle, dialect="excel-tab") header = [""] + org_names id_writer.writerow(header) score_writer.writerow(header) _do_work(db_refs, in_handle, id_writer, score_writer, config)
def main(org_config_file, config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) with open(org_config_file) as in_handle: org_config = yaml.load(in_handle) if not os.path.exists(config['work_dir']): os.makedirs(config['work_dir']) (_, org_names, db_refs) = blast.get_org_dbs(config['db_dir'], org_config['target_org']) id_file, score_file = setup_output_files(org_config['target_org']) with open(org_config['search_file']) as in_handle: with open(id_file, "w") as id_out_handle: with open(score_file, "w") as score_out_handle: id_writer = csv.writer(id_out_handle, dialect='excel-tab') score_writer = csv.writer(score_out_handle, dialect='excel-tab') header = [""] + org_names id_writer.writerow(header) score_writer.writerow(header) _do_work(db_refs, in_handle, id_writer, score_writer, config)