Beispiel #1
0
def setup_db(work_dir, db_dir, target_org, fs, lfs):
    """Copy over BLAST database files, prepping them for map availability.
    """
    (_, org_names, db_refs) = blast.get_org_dbs(db_dir, target_org)
    work_dir = os.path.join(work_dir, db_dir)
    fs.create_directory(work_dir)
    db_refs = db_refs
    ref_info = []
    blast_dbs = []
    for db_path in db_refs:
        blast_dbs.append(os.path.basename(db_path))
        for fname in glob.glob(db_path + ".[p|n]*"):
            hdfs_ref = _hdfs_ref(work_dir, fname)
            lfs.copy(fname, fs, hdfs_ref)
            ref_info.append("%s#%s" % (hdfs_ref, os.path.basename(hdfs_ref)))
    return ",".join(ref_info), blast_dbs, org_names
Beispiel #2
0
def main(org_config_file, config_file):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    with open(org_config_file) as in_handle:
        org_config = yaml.load(in_handle)
    if not os.path.exists(config['work_dir']):
        os.makedirs(config['work_dir'])
    (_, org_names, db_refs) = blast.get_org_dbs(config['db_dir'],
                                                org_config['target_org'])
    id_file, score_file = setup_output_files(org_config['target_org'],
                                             org_names)
    file_info = [id_file, score_file]
    pool = multiprocessing.Pool(int(config['num_cores']))
    with open(org_config['search_file']) as in_handle:
        pool.map(_process_wrapper,
                 ((rec, db_refs, file_info, config['work_dir'])
                  for rec in SeqIO.parse(in_handle, "fasta")))
Beispiel #3
0
def main(org_config_file, config_file):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    with open(org_config_file) as in_handle:
        org_config = yaml.load(in_handle)
    if not os.path.exists(config['work_dir']):
        os.makedirs(config['work_dir'])
    (_, org_names, db_refs) = blast.get_org_dbs(config['db_dir'],
            org_config['target_org'])
    id_file, score_file = setup_output_files(org_config['target_org'],
            org_names)
    file_info = [id_file, score_file]
    pool = multiprocessing.Pool(int(config['num_cores']))
    with open(org_config['search_file']) as in_handle:
        pool.map(_process_wrapper,
                ((rec, db_refs, file_info, config['work_dir'], config.get("blast_cmd"))
                    for rec in SeqIO.parse(in_handle, "fasta")))
Beispiel #4
0
def setup_db(work_dir_base, db_dir, target_org):
    """Copy over BLAST database files, prepping them for map availability.
    """
    (_, org_names, db_refs) = blast.get_org_dbs(db_dir, target_org)
    work_dir = os.path.join(work_dir_base, db_dir)
    cl = ["hadoop", "fs", "-mkdir", work_dir]
    subprocess.check_call(cl)
    ref_info = []
    blast_dbs = []
    for db_path in db_refs:
        blast_dbs.append(os.path.basename(db_path))
        for fname in glob.glob(db_path + ".[p|n]*"):
            hdfs_ref = _hdfs_ref(work_dir, fname)
            cl = ["hadoop", "fs", "-put", fname, hdfs_ref]
            subprocess.check_call(cl)
            ref_info.append("%s#%s" % (hdfs_ref, os.path.basename(hdfs_ref)))
    return ref_info, blast_dbs, org_names
Beispiel #5
0
def main(org_config_file, config_file):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    with open(org_config_file) as in_handle:
        org_config = yaml.load(in_handle)
    if not os.path.exists(config["work_dir"]):
        os.makedirs(config["work_dir"])
    (_, org_names, db_refs) = blast.get_org_dbs(config["db_dir"], org_config["target_org"])
    id_file, score_file = setup_output_files(org_config["target_org"])
    with open(org_config["search_file"]) as in_handle:
        with open(id_file, "w") as id_out_handle:
            with open(score_file, "w") as score_out_handle:
                id_writer = csv.writer(id_out_handle, dialect="excel-tab")
                score_writer = csv.writer(score_out_handle, dialect="excel-tab")
                header = [""] + org_names
                id_writer.writerow(header)
                score_writer.writerow(header)
                _do_work(db_refs, in_handle, id_writer, score_writer, config)
Beispiel #6
0
def main(org_config_file, config_file):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    with open(org_config_file) as in_handle:
        org_config = yaml.load(in_handle)
    if not os.path.exists(config['work_dir']):
        os.makedirs(config['work_dir'])
    (_, org_names, db_refs) = blast.get_org_dbs(config['db_dir'],
                                                org_config['target_org'])
    id_file, score_file = setup_output_files(org_config['target_org'])
    with open(org_config['search_file']) as in_handle:
        with open(id_file, "w") as id_out_handle:
            with open(score_file, "w") as score_out_handle:
                id_writer = csv.writer(id_out_handle, dialect='excel-tab')
                score_writer = csv.writer(score_out_handle,
                                          dialect='excel-tab')
                header = [""] + org_names
                id_writer.writerow(header)
                score_writer.writerow(header)
                _do_work(db_refs, in_handle, id_writer, score_writer, config)