def annotate_hmm_matches(hits_file, hits_annot_file, args): hits_annot_header = map( str.strip, '''#query_name, hit, level, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, query_coverage, members_in_og, og_description, og_COG_categories'''. split(',')) annota.connect() print colorify("Functional annotation of hits starts now", 'green') start_time = time.time() if pexists(hits_file): OUT = open(hits_annot_file, "w") if not args.no_file_comments: print >> OUT, get_call_info() print >> OUT, '\t'.join(hits_annot_header) qn = 0 t1 = time.time() for line in open(hits_file): if not line.strip() or line.startswith('#'): continue qn += 1 if qn and (qn % 10000 == 0): total_time = time.time() - start_time print >>sys.stderr, qn, total_time, "%0.2f q/s (refinement)" %\ ((float(qn) / total_time)) sys.stderr.flush() (query, hit, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, q_coverage) = map(str.strip, line.split('\t')) if hit not in ['ERROR', '-']: hitname = cleanup_og_name(hit) level, nm, desc, cats = annota.get_og_annotations(hitname) print >> OUT, '\t'.join( map(str, [ query, hitname, level, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, q_coverage, nm, desc, cats ])) else: print >> OUT, '\t'.join([query] + [hit] * (len(hits_annot_header) - 1)) elapsed_time = time.time() - t1 if not args.no_file_comments: print >> OUT, '# %d queries scanned' % (qn) print >> OUT, '# Total time (seconds):', elapsed_time print >> OUT, '# Rate:', "%0.2f q/s" % ((float(qn) / elapsed_time)) OUT.close() print colorify(" Processed queries:%s total_time:%s rate:%s" %\ (qn, elapsed_time, "%0.2f q/s" % ((float(qn) / elapsed_time))), 'lblue')
def annotate_hmm_matches(hits_file, hits_annot_file, args): hits_annot_header = map(str.strip, '''#query_name, hit, level, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, query_coverage, members_in_og, og_description, og_COG_categories'''.split(',')) annota.connect() print colorify("Functional annotation of hits starts now", 'green') start_time = time.time() if pexists(hits_file): OUT = open(hits_annot_file, "w") if not args.no_file_comments: print >>OUT, get_call_info() print >>OUT, '\t'.join(hits_annot_header) qn = 0 t1 = time.time() for line in open(hits_file): if not line.strip() or line.startswith('#'): continue if qn and (qn % 10000 == 0): total_time = time.time() - start_time print >>sys.stderr, qn+1, total_time, "%0.2f q/s (refinement)" %\ ((float(qn + 1) / total_time)) sys.stderr.flush() qn += 1 (query, hit, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, q_coverage) = map(str.strip, line.split('\t')) if hit not in ['ERROR', '-']: hitname = cleanup_og_name(hit) level, nm, desc, cats = annota.get_og_annotations(hitname) print >>OUT, '\t'.join(map( str, [query, hitname, level, evalue, sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, q_coverage, nm, desc, cats])) else: print >>OUT, '\t'.join( [query] + [hit] * (len(hits_annot_header) - 1)) elapsed_time = time.time() - t1 if not args.no_file_comments: print >>OUT, '# %d queries scanned' % (qn + 1) print >>OUT, '# Total time (seconds):', elapsed_time print >>OUT, '# Rate:', "%0.2f q/s" % ((float(qn + 1) / elapsed_time)) OUT.close() print colorify(" Processed queries:%s total_time:%s rate:%s" %\ (qn+1, elapsed_time, "%0.2f q/s" % ((float(qn+1) / elapsed_time))), 'lblue')