def map(self, context): config = context.getJobConf() tmp_dir = config.get("job.local.dir") xref_dbs = config.get("fasta.blastdb").split(",") cur_key, ids, scores = blast.blast_top_hits(context.getInputKey(), context.getInputValue(), xref_dbs, tmp_dir) cur_val = dict(ids=ids, scores=scores) context.emit(cur_key, json.dumps(cur_val))
def process_blast(rec, db_refs, tmp_dir, blast_cmd): """Run a BLAST writing results to shared files. """ cur_id, id_info, score_info = blast.blast_top_hits(rec.id, rec.format("fasta"), db_refs, tmp_dir, blast_cmd) print cur_id return {"cmp_id": id_info, "cmp_score": score_info, "cur_id": cur_id}
def mapper(key, rec): tmp_dir = os.environ["job_local_dir"] xref_dbs = os.environ["fasta_blastdb"].split(",") parts = rec.split("\t") if len(parts) == 3: # remove extra initial tab if present parts = parts[1:] title, seq = rec.split("\t") rec_id = title.split()[0] cur_key, ids, scores = blast.blast_top_hits(rec_id, seq, xref_dbs, tmp_dir) cur_val = dict(ids=ids, scores=scores) yield cur_key, cur_val
def process_blast(rec, db_refs, file_info, tmp_dir, blast_cmd): """Run a BLAST writing results to shared files. """ cur_id, id_info, score_info = blast.blast_top_hits(rec.id, rec.format("fasta"), db_refs, tmp_dir, blast_cmd) with fupdate_lock: id_file, score_file = file_info for fname, fvals in [(id_file, id_info), (score_file, score_info)]: with open(fname, "a") as out_handle: writer = csv.writer(out_handle, dialect='excel-tab') writer.writerow([cur_id] + fvals) print cur_id
def process_blast(rec, db_refs, file_info, tmp_dir): """Run a BLAST writing results to shared files. """ cur_id, id_info, score_info = blast.blast_top_hits(rec.id, rec.format("fasta"), db_refs, tmp_dir) with fupdate_lock: id_file, score_file = file_info for fname, fvals in [(id_file, id_info), (score_file, score_info)]: with open(fname, "a") as out_handle: writer = csv.writer(out_handle, dialect='excel-tab') writer.writerow([cur_id] + fvals) print cur_id