Exemple #1
0
 def map(self, context):
     config = context.getJobConf()
     tmp_dir = config.get("job.local.dir")
     xref_dbs = config.get("fasta.blastdb").split(",")
     cur_key, ids, scores = blast.blast_top_hits(context.getInputKey(),
             context.getInputValue(), xref_dbs, tmp_dir)
     cur_val = dict(ids=ids, scores=scores)
     context.emit(cur_key, json.dumps(cur_val))
 def map(self, context):
     config = context.getJobConf()
     tmp_dir = config.get("job.local.dir")
     xref_dbs = config.get("fasta.blastdb").split(",")
     cur_key, ids, scores = blast.blast_top_hits(context.getInputKey(),
                                                 context.getInputValue(),
                                                 xref_dbs, tmp_dir)
     cur_val = dict(ids=ids, scores=scores)
     context.emit(cur_key, json.dumps(cur_val))
Exemple #3
0
def process_blast(rec, db_refs, tmp_dir, blast_cmd):
    """Run a BLAST writing results to shared files.
    """
    cur_id, id_info, score_info = blast.blast_top_hits(rec.id,
                                                       rec.format("fasta"),
                                                       db_refs, tmp_dir,
                                                       blast_cmd)
    print cur_id
    return {"cmp_id": id_info, "cmp_score": score_info, "cur_id": cur_id}
def mapper(key, rec):
    tmp_dir = os.environ["job_local_dir"]
    xref_dbs = os.environ["fasta_blastdb"].split(",")
    parts = rec.split("\t")
    if len(parts) == 3: # remove extra initial tab if present
        parts = parts[1:]
    title, seq = rec.split("\t")
    rec_id = title.split()[0]
    cur_key, ids, scores = blast.blast_top_hits(rec_id, seq, xref_dbs, tmp_dir)
    cur_val = dict(ids=ids, scores=scores)
    yield cur_key, cur_val
Exemple #5
0
def mapper(key, rec):
    tmp_dir = os.environ["job_local_dir"]
    xref_dbs = os.environ["fasta_blastdb"].split(",")
    parts = rec.split("\t")
    if len(parts) == 3:  # remove extra initial tab if present
        parts = parts[1:]
    title, seq = rec.split("\t")
    rec_id = title.split()[0]
    cur_key, ids, scores = blast.blast_top_hits(rec_id, seq, xref_dbs, tmp_dir)
    cur_val = dict(ids=ids, scores=scores)
    yield cur_key, cur_val
Exemple #6
0
def process_blast(rec, db_refs, file_info, tmp_dir, blast_cmd):
    """Run a BLAST writing results to shared files.
    """
    cur_id, id_info, score_info = blast.blast_top_hits(rec.id, rec.format("fasta"),
            db_refs, tmp_dir, blast_cmd)
    with fupdate_lock:
        id_file, score_file = file_info
        for fname, fvals in [(id_file, id_info), (score_file, score_info)]:
            with open(fname, "a") as out_handle:
                writer = csv.writer(out_handle, dialect='excel-tab')
                writer.writerow([cur_id] + fvals)
        print cur_id
Exemple #7
0
def process_blast(rec, db_refs, file_info, tmp_dir):
    """Run a BLAST writing results to shared files.
    """
    cur_id, id_info, score_info = blast.blast_top_hits(rec.id,
                                                       rec.format("fasta"),
                                                       db_refs, tmp_dir)
    with fupdate_lock:
        id_file, score_file = file_info
        for fname, fvals in [(id_file, id_info), (score_file, score_info)]:
            with open(fname, "a") as out_handle:
                writer = csv.writer(out_handle, dialect='excel-tab')
                writer.writerow([cur_id] + fvals)
        print cur_id
Exemple #8
0
def process_blast(rec, db_refs, tmp_dir, blast_cmd):
    """Run a BLAST writing results to shared files.
    """
    cur_id, id_info, score_info = blast.blast_top_hits(rec.id, rec.format("fasta"), db_refs, tmp_dir, blast_cmd)
    print cur_id
    return {"cmp_id": id_info, "cmp_score": score_info, "cur_id": cur_id}