def build_matrix_str(allres): hs = allres.hs cx2_gx = hs.tables.cx2_gx gx2_gname = hs.tables.gx2_gname def cx2_gname(cx): return [os.path.splitext(gname)[0] for gname in gx2_gname[cx2_gx]] col_label_gname = cx2_gname(allres.col_label_cx) row_label_gname = cx2_gname(allres.row_label_cx) timestamp = helpers.get_timestamp(format_='comment') + '\n' header = '\n'.join([ '# Result score matrix', '# Generated on: ' + timestamp, '# Format: rows separated by newlines, cols separated by commas', '# num_queries / rows = ' + repr(len(row_label_gname)), '# num_indexed / cols = ' + repr(len(col_label_gname)), '# row_labels = ' + repr(row_label_gname), '# col_labels = ' + repr(col_label_gname) ]) row_strings = [] for row in allres.score_matrix: row_str = map(lambda x: '%5.2f' % x, row) row_strings.append(', '.join(row_str)) body = '\n'.join(row_strings) matrix_str = '\n'.join([header, body]) allres.matrix_str = matrix_str
def build_matrix_str(allres): hs = allres.hs cx2_gx = hs.tables.cx2_gx gx2_gname = hs.tables.gx2_gname def cx2_gname(cx): return [os.path.splitext(gname)[0] for gname in gx2_gname[cx2_gx]] col_label_gname = cx2_gname(allres.col_label_cx) row_label_gname = cx2_gname(allres.row_label_cx) timestamp = helpers.get_timestamp(format_='comment') + '\n' header = '\n'.join( ['# Result score matrix', '# Generated on: ' + timestamp, '# Format: rows separated by newlines, cols separated by commas', '# num_queries / rows = ' + repr(len(row_label_gname)), '# num_indexed / cols = ' + repr(len(col_label_gname)), '# row_labels = ' + repr(row_label_gname), '# col_labels = ' + repr(col_label_gname)]) row_strings = [] for row in allres.score_matrix: row_str = map(lambda x: '%5.2f' % x, row) row_strings.append(', '.join(row_str)) body = '\n'.join(row_strings) matrix_str = '\n'.join([header, body]) allres.matrix_str = matrix_str
def backup_csv_tables(hs, force_backup=False): internal_dir = hs.dirs.internal_dir backup_dir = join(internal_dir, 'backup_v0.1.0') if not exists(backup_dir) or force_backup: helpers.ensuredir(backup_dir) timestamp = helpers.get_timestamp(use_second=True) def do_backup(fname): src = join(internal_dir, fname) dst_fname = ('%s_bak-' + timestamp + '%s') % splitext(fname) dst = join(backup_dir, dst_fname) if exists(src): shutil.copy(src, dst) do_backup(CHIP_TABLE_FNAME) do_backup(NAME_TABLE_FNAME) do_backup(IMAGE_TABLE_FNAME)
def __dump_text_report(allres, report_type): if not 'report_type' in vars(): report_type = 'rankres_str' print('[rr2] Dumping textfile: ' + report_type) report_str = allres.__dict__[report_type] # Get directories result_dir = allres.hs.dirs.result_dir timestamp_dir = join(result_dir, 'timestamped_results') helpers.ensurepath(timestamp_dir) helpers.ensurepath(result_dir) # Write to timestamp and result dir timestamp = helpers.get_timestamp() csv_timestamp_fname = report_type + allres.title_suffix + timestamp + '.csv' csv_timestamp_fpath = join(timestamp_dir, csv_timestamp_fname) csv_fname = report_type + allres.title_suffix + '.csv' csv_fpath = join(result_dir, csv_fname) helpers.write_to(csv_fpath, report_str) helpers.write_to(csv_timestamp_fpath, report_str)
def translate_python_to_cython(input_lines): try: from hscom import helpers as util timestamp = util.get_timestamp() except Exception: timestamp = '???' pass output_lines = ['# THIS FILE WAS AUTOGENERATED ON %s\n' % timestamp] MODE = None PYX_MAP = {} translate_nLines = 0 for in_line in input_lines: MODE, in_line = pyx_preprocess(MODE, in_line, PYX_MAP) if MODE is None: # No cython annotations output_lines.append(in_line) elif MODE == START_TAG: # Start unannotating cython code; in_line = translate_cython_line(in_line, PYX_MAP) output_lines.append(in_line) elif MODE == ELSE_TAG: # Exclude from cython pass elif MODE == DEFINE_TAG: # Skip the python define if in_line.find('#') == -1 and in_line.find('):') != -1: MODE = None translate_nLines = 0 # Translate the cython define elif in_line.find('#') == 0: translate_nLines = 1 elif MODE == CDEF_TAG: translate_nLines = 1 MODE = None # We are translating the next N lines if translate_nLines > 0: translate_nLines -= 1 in_line = translate_cython_line(in_line, PYX_MAP) output_lines.append(in_line) return output_lines
def build_rankres_str(allres): 'Builds csv files showing the cxs/scores/ranks of the query results' hs = allres.hs #SV = allres.SV #qcx2_res = allres.qcx2_res cx2_cid = hs.tables.cx2_cid #cx2_nx = hs.tables.cx2_nx test_samp = hs.test_sample_cx train_samp = hs.train_sample_cx indx_samp = hs.indexed_sample_cx # Get organized data for csv file (qcx2_top_true_rank, qcx2_top_true_score, qcx2_top_true_cx) = allres.top_true_qcx_arrays (qcx2_bot_true_rank, qcx2_bot_true_score, qcx2_bot_true_cx) = allres.bot_true_qcx_arrays (qcx2_top_false_rank, qcx2_top_false_score, qcx2_top_false_cx) = allres.top_false_qcx_arrays # Number of groundtruth per query qcx2_numgt = np.zeros(len(cx2_cid)) - 2 for qcx in test_samp: qcx2_numgt[qcx] = len(hs.get_other_indexed_cxs(qcx)) # Easy to digest results num_chips = len(test_samp) num_nonquery = len(np.setdiff1d(indx_samp, test_samp)) # Find the test samples WITH ground truth test_samp_with_gt = np.array(test_samp)[qcx2_numgt[test_samp] > 0] if len(test_samp_with_gt) == 0: warnings.warn('[rr2] there were no queries with ground truth') #train_nxs_set = set(cx2_nx[train_samp]) flag_cxs_fn = hs.flag_cxs_with_name_in_sample def ranks_less_than_(thresh, intrain=None): #Find the number of ranks scoring more than thresh # Get statistics with respect to the training set if len(test_samp_with_gt) == 0: test_cxs_ = np.array([]) elif intrain is None: # report all test_cxs_ = test_samp_with_gt else: # report either or in_train_flag = flag_cxs_fn(test_samp_with_gt, train_samp) if intrain is False: in_train_flag = True - in_train_flag test_cxs_ = test_samp_with_gt[in_train_flag] # number of test samples with ground truth num_with_gt = len(test_cxs_) if num_with_gt == 0: return [], ('NoGT', 'NoGT', -1, 'NoGT') # find tests with ranks greater and less than thresh testcx2_ttr = qcx2_top_true_rank[test_cxs_] greater_cxs = test_cxs_[np.where(testcx2_ttr > thresh)[0]] num_greater = len(greater_cxs) num_less = num_with_gt - num_greater num_greater = num_with_gt - num_less frac_less = 100.0 * num_less / num_with_gt fmt_tup = (num_less, num_with_gt, frac_less, num_greater) return greater_cxs, fmt_tup greater5_cxs, fmt5_tup = ranks_less_than_(5) greater1_cxs, fmt1_tup = ranks_less_than_(1) # gt5_intrain_cxs, fmt5_in_tup = ranks_less_than_(5, intrain=True) gt1_intrain_cxs, fmt1_in_tup = ranks_less_than_(1, intrain=True) # gt5_outtrain_cxs, fmt5_out_tup = ranks_less_than_(5, intrain=False) gt1_outtrain_cxs, fmt1_out_tup = ranks_less_than_(1, intrain=False) # allres.greater1_cxs = greater1_cxs allres.greater5_cxs = greater5_cxs #print('greater5_cxs = %r ' % (allres.greater5_cxs,)) #print('greater1_cxs = %r ' % (allres.greater1_cxs,)) # CSV Metadata header = '# Experiment allres.title_suffix = ' + allres.title_suffix + '\n' header += helpers.get_timestamp(format_='comment') + '\n' # Scalar summary scalar_summary = '# Num Query Chips: %d \n' % num_chips scalar_summary += '# Num Query Chips with at least one match: %d \n' % len(test_samp_with_gt) scalar_summary += '# Num NonQuery Chips: %d \n' % num_nonquery scalar_summary += '# Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_tup) scalar_summary += '# Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (fmt1_tup) scalar_summary += '# InTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_in_tup) scalar_summary += '# InTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (fmt1_in_tup) scalar_summary += '# OutTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_out_tup) scalar_summary += '# OutTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (fmt1_out_tup) header += scalar_summary # Experiment parameters #header += '# Full Parameters: \n' + helpers.indent(params.param_string(), '#') + '\n\n' # More Metadata header += textwrap.dedent(''' # Rank Result Metadata: # QCX = Query chip-index # QGNAME = Query images name # NUMGT = Num ground truth matches # TT = top true # BT = bottom true # TF = top false''').strip() # Build the CSV table test_sample_gx = hs.tables.cx2_gx[test_samp] test_sample_gname = hs.tables.gx2_gname[test_sample_gx] test_sample_gname = [g.replace('.jpg', '') for g in test_sample_gname] column_labels = ['QCX', 'NUM GT', 'TT CX', 'BT CX', 'TF CX', 'TT SCORE', 'BT SCORE', 'TF SCORE', 'TT RANK', 'BT RANK', 'TF RANK', 'QGNAME', ] column_list = [ test_samp, qcx2_numgt[test_samp], qcx2_top_true_cx[test_samp], qcx2_bot_true_cx[test_samp], qcx2_top_false_cx[test_samp], qcx2_top_true_score[test_samp], qcx2_bot_true_score[test_samp], qcx2_top_false_score[test_samp], qcx2_top_true_rank[test_samp], qcx2_bot_true_rank[test_samp], qcx2_top_false_rank[test_samp], test_sample_gname, ] column_type = [int, int, int, int, int, float, float, float, int, int, int, str, ] rankres_str = ld2.make_csv_table(column_labels, column_list, header, column_type) # Put some more data at the end problem_true_pairs = zip(allres.problem_true.qcxs, allres.problem_true.cxs) problem_false_pairs = zip(allres.problem_false.qcxs, allres.problem_false.cxs) problem_str = '\n'.join( [ '#Problem Cases: ', '# problem_true_pairs = ' + repr(problem_true_pairs), '# problem_false_pairs = ' + repr(problem_false_pairs)]) rankres_str += '\n' + problem_str # Attach results to allres structure allres.rankres_str = rankres_str allres.scalar_summary = scalar_summary allres.problem_false_pairs = problem_false_pairs allres.problem_true_pairs = problem_true_pairs allres.problem_false_pairs = problem_false_pairs allres.problem_true_pairs = problem_true_pairs
def build_rankres_str(allres): 'Builds csv files showing the cxs/scores/ranks of the query results' hs = allres.hs #SV = allres.SV #qcx2_res = allres.qcx2_res cx2_cid = hs.tables.cx2_cid #cx2_nx = hs.tables.cx2_nx test_samp = hs.test_sample_cx train_samp = hs.train_sample_cx indx_samp = hs.indexed_sample_cx # Get organized data for csv file (qcx2_top_true_rank, qcx2_top_true_score, qcx2_top_true_cx) = allres.top_true_qcx_arrays (qcx2_bot_true_rank, qcx2_bot_true_score, qcx2_bot_true_cx) = allres.bot_true_qcx_arrays (qcx2_top_false_rank, qcx2_top_false_score, qcx2_top_false_cx) = allres.top_false_qcx_arrays # Number of groundtruth per query qcx2_numgt = np.zeros(len(cx2_cid)) - 2 for qcx in test_samp: qcx2_numgt[qcx] = len(hs.get_other_indexed_cxs(qcx)) # Easy to digest results num_chips = len(test_samp) num_nonquery = len(np.setdiff1d(indx_samp, test_samp)) # Find the test samples WITH ground truth test_samp_with_gt = np.array(test_samp)[qcx2_numgt[test_samp] > 0] if len(test_samp_with_gt) == 0: warnings.warn('[rr2] there were no queries with ground truth') #train_nxs_set = set(cx2_nx[train_samp]) flag_cxs_fn = hs.flag_cxs_with_name_in_sample def ranks_less_than_(thresh, intrain=None): #Find the number of ranks scoring more than thresh # Get statistics with respect to the training set if len(test_samp_with_gt) == 0: test_cxs_ = np.array([]) elif intrain is None: # report all test_cxs_ = test_samp_with_gt else: # report either or in_train_flag = flag_cxs_fn(test_samp_with_gt, train_samp) if intrain is False: in_train_flag = True - in_train_flag test_cxs_ = test_samp_with_gt[in_train_flag] # number of test samples with ground truth num_with_gt = len(test_cxs_) if num_with_gt == 0: return [], ('NoGT', 'NoGT', -1, 'NoGT') # find tests with ranks greater and less than thresh testcx2_ttr = qcx2_top_true_rank[test_cxs_] greater_cxs = test_cxs_[np.where(testcx2_ttr > thresh)[0]] num_greater = len(greater_cxs) num_less = num_with_gt - num_greater num_greater = num_with_gt - num_less frac_less = 100.0 * num_less / num_with_gt fmt_tup = (num_less, num_with_gt, frac_less, num_greater) return greater_cxs, fmt_tup greater5_cxs, fmt5_tup = ranks_less_than_(5) greater1_cxs, fmt1_tup = ranks_less_than_(1) # gt5_intrain_cxs, fmt5_in_tup = ranks_less_than_(5, intrain=True) gt1_intrain_cxs, fmt1_in_tup = ranks_less_than_(1, intrain=True) # gt5_outtrain_cxs, fmt5_out_tup = ranks_less_than_(5, intrain=False) gt1_outtrain_cxs, fmt1_out_tup = ranks_less_than_(1, intrain=False) # allres.greater1_cxs = greater1_cxs allres.greater5_cxs = greater5_cxs #print('greater5_cxs = %r ' % (allres.greater5_cxs,)) #print('greater1_cxs = %r ' % (allres.greater1_cxs,)) # CSV Metadata header = '# Experiment allres.title_suffix = ' + allres.title_suffix + '\n' header += helpers.get_timestamp(format_='comment') + '\n' # Scalar summary scalar_summary = '# Num Query Chips: %d \n' % num_chips scalar_summary += '# Num Query Chips with at least one match: %d \n' % len( test_samp_with_gt) scalar_summary += '# Num NonQuery Chips: %d \n' % num_nonquery scalar_summary += '# Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_tup) scalar_summary += '# Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % ( fmt1_tup) scalar_summary += '# InTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % ( fmt5_in_tup) scalar_summary += '# InTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % ( fmt1_in_tup) scalar_summary += '# OutTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % ( fmt5_out_tup) scalar_summary += '# OutTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % ( fmt1_out_tup) header += scalar_summary # Experiment parameters #header += '# Full Parameters: \n' + helpers.indent(params.param_string(), '#') + '\n\n' # More Metadata header += textwrap.dedent(''' # Rank Result Metadata: # QCX = Query chip-index # QGNAME = Query images name # NUMGT = Num ground truth matches # TT = top true # BT = bottom true # TF = top false''').strip() # Build the CSV table test_sample_gx = hs.tables.cx2_gx[test_samp] test_sample_gname = hs.tables.gx2_gname[test_sample_gx] test_sample_gname = [g.replace('.jpg', '') for g in test_sample_gname] column_labels = [ 'QCX', 'NUM GT', 'TT CX', 'BT CX', 'TF CX', 'TT SCORE', 'BT SCORE', 'TF SCORE', 'TT RANK', 'BT RANK', 'TF RANK', 'QGNAME', ] column_list = [ test_samp, qcx2_numgt[test_samp], qcx2_top_true_cx[test_samp], qcx2_bot_true_cx[test_samp], qcx2_top_false_cx[test_samp], qcx2_top_true_score[test_samp], qcx2_bot_true_score[test_samp], qcx2_top_false_score[test_samp], qcx2_top_true_rank[test_samp], qcx2_bot_true_rank[test_samp], qcx2_top_false_rank[test_samp], test_sample_gname, ] column_type = [ int, int, int, int, int, float, float, float, int, int, int, str, ] rankres_str = ld2.make_csv_table(column_labels, column_list, header, column_type) # Put some more data at the end problem_true_pairs = zip(allres.problem_true.qcxs, allres.problem_true.cxs) problem_false_pairs = zip(allres.problem_false.qcxs, allres.problem_false.cxs) problem_str = '\n'.join([ '#Problem Cases: ', '# problem_true_pairs = ' + repr(problem_true_pairs), '# problem_false_pairs = ' + repr(problem_false_pairs) ]) rankres_str += '\n' + problem_str # Attach results to allres structure allres.rankres_str = rankres_str allres.scalar_summary = scalar_summary allres.problem_false_pairs = problem_false_pairs allres.problem_true_pairs = problem_true_pairs allres.problem_false_pairs = problem_false_pairs allres.problem_true_pairs = problem_true_pairs