#infname = 'head-data.csv' # bzgrep -m100 . /shared/silo_researcher/Matsen_F/MatsenGrp/data/bcr/output_sw/C/01-C-N_merged.tsv.bz2 | sed 's/[ \t][ \t]*/,/g'|cut -f2 -d, > head-data.csv with opener('r')(infname) as infile: germlines = utils.read_germlines('../../../recombinator') reader = csv.DictReader(infile) for inline in reader: print 'searching' # inline['seq'] = inline['seq'][-130:] searcher = Searcher(inline['seq'], debug=True, n_matches_max=2) searcher.search() inferred_group_str = '' true_group_str = '' outline = {} outline['seq'] = inline['seq'] print 'RESULT ', for region in utils.regions: inferred_name = searcher.get_best_match_name(region) outline[region + '_gene'] = utils.unsanitize_name(inferred_name) true_name = utils.sanitize_name(inline[region + '_gene']) inferred_group_str += inferred_name true_group_str += true_name if inferred_name == 'none': print ' none', elif inferred_name == true_name: print ' - ', else: print ' x ', for region in utils.regions: print '%3d' % searcher.n_tries[region], print '' print ' true'