Esempio n. 1
0
def build_matrix_str(allres):
    hs = allres.hs
    cx2_gx = hs.tables.cx2_gx
    gx2_gname = hs.tables.gx2_gname

    def cx2_gname(cx):
        return [os.path.splitext(gname)[0] for gname in gx2_gname[cx2_gx]]

    col_label_gname = cx2_gname(allres.col_label_cx)
    row_label_gname = cx2_gname(allres.row_label_cx)
    timestamp = helpers.get_timestamp(format_='comment') + '\n'
    header = '\n'.join([
        '# Result score matrix', '# Generated on: ' + timestamp,
        '# Format: rows separated by newlines, cols separated by commas',
        '# num_queries  / rows = ' + repr(len(row_label_gname)),
        '# num_indexed  / cols = ' + repr(len(col_label_gname)),
        '# row_labels = ' + repr(row_label_gname),
        '# col_labels = ' + repr(col_label_gname)
    ])
    row_strings = []
    for row in allres.score_matrix:
        row_str = map(lambda x: '%5.2f' % x, row)
        row_strings.append(', '.join(row_str))
    body = '\n'.join(row_strings)
    matrix_str = '\n'.join([header, body])
    allres.matrix_str = matrix_str
Esempio n. 2
0
def build_matrix_str(allres):
    hs = allres.hs
    cx2_gx = hs.tables.cx2_gx
    gx2_gname = hs.tables.gx2_gname

    def cx2_gname(cx):
        return [os.path.splitext(gname)[0] for gname in gx2_gname[cx2_gx]]
    col_label_gname = cx2_gname(allres.col_label_cx)
    row_label_gname = cx2_gname(allres.row_label_cx)
    timestamp =  helpers.get_timestamp(format_='comment') + '\n'
    header = '\n'.join(
        ['# Result score matrix',
         '# Generated on: ' + timestamp,
         '# Format: rows separated by newlines, cols separated by commas',
         '# num_queries  / rows = ' + repr(len(row_label_gname)),
         '# num_indexed  / cols = ' + repr(len(col_label_gname)),
         '# row_labels = ' + repr(row_label_gname),
         '# col_labels = ' + repr(col_label_gname)])
    row_strings = []
    for row in allres.score_matrix:
        row_str = map(lambda x: '%5.2f' % x, row)
        row_strings.append(', '.join(row_str))
    body = '\n'.join(row_strings)
    matrix_str = '\n'.join([header, body])
    allres.matrix_str = matrix_str
Esempio n. 3
0
def backup_csv_tables(hs, force_backup=False):
    internal_dir = hs.dirs.internal_dir
    backup_dir = join(internal_dir, 'backup_v0.1.0')
    if not exists(backup_dir) or force_backup:
        helpers.ensuredir(backup_dir)
        timestamp = helpers.get_timestamp(use_second=True)

        def do_backup(fname):
            src = join(internal_dir, fname)
            dst_fname = ('%s_bak-' + timestamp + '%s') % splitext(fname)
            dst = join(backup_dir, dst_fname)
            if exists(src):
                shutil.copy(src, dst)
        do_backup(CHIP_TABLE_FNAME)
        do_backup(NAME_TABLE_FNAME)
        do_backup(IMAGE_TABLE_FNAME)
Esempio n. 4
0
def backup_csv_tables(hs, force_backup=False):
    internal_dir = hs.dirs.internal_dir
    backup_dir = join(internal_dir, 'backup_v0.1.0')
    if not exists(backup_dir) or force_backup:
        helpers.ensuredir(backup_dir)
        timestamp = helpers.get_timestamp(use_second=True)

        def do_backup(fname):
            src = join(internal_dir, fname)
            dst_fname = ('%s_bak-' + timestamp + '%s') % splitext(fname)
            dst = join(backup_dir, dst_fname)
            if exists(src):
                shutil.copy(src, dst)

        do_backup(CHIP_TABLE_FNAME)
        do_backup(NAME_TABLE_FNAME)
        do_backup(IMAGE_TABLE_FNAME)
Esempio n. 5
0
def __dump_text_report(allres, report_type):
    if not 'report_type' in vars():
        report_type = 'rankres_str'
    print('[rr2] Dumping textfile: ' + report_type)
    report_str = allres.__dict__[report_type]
    # Get directories
    result_dir    = allres.hs.dirs.result_dir
    timestamp_dir = join(result_dir, 'timestamped_results')
    helpers.ensurepath(timestamp_dir)
    helpers.ensurepath(result_dir)
    # Write to timestamp and result dir
    timestamp = helpers.get_timestamp()
    csv_timestamp_fname = report_type + allres.title_suffix + timestamp + '.csv'
    csv_timestamp_fpath = join(timestamp_dir, csv_timestamp_fname)
    csv_fname  = report_type + allres.title_suffix + '.csv'
    csv_fpath = join(result_dir, csv_fname)
    helpers.write_to(csv_fpath, report_str)
    helpers.write_to(csv_timestamp_fpath, report_str)
Esempio n. 6
0
def __dump_text_report(allres, report_type):
    if not 'report_type' in vars():
        report_type = 'rankres_str'
    print('[rr2] Dumping textfile: ' + report_type)
    report_str = allres.__dict__[report_type]
    # Get directories
    result_dir = allres.hs.dirs.result_dir
    timestamp_dir = join(result_dir, 'timestamped_results')
    helpers.ensurepath(timestamp_dir)
    helpers.ensurepath(result_dir)
    # Write to timestamp and result dir
    timestamp = helpers.get_timestamp()
    csv_timestamp_fname = report_type + allres.title_suffix + timestamp + '.csv'
    csv_timestamp_fpath = join(timestamp_dir, csv_timestamp_fname)
    csv_fname = report_type + allres.title_suffix + '.csv'
    csv_fpath = join(result_dir, csv_fname)
    helpers.write_to(csv_fpath, report_str)
    helpers.write_to(csv_timestamp_fpath, report_str)
Esempio n. 7
0
def translate_python_to_cython(input_lines):
    try:
        from hscom import helpers as util
        timestamp = util.get_timestamp()
    except Exception:
        timestamp = '???'
        pass
    output_lines = ['# THIS FILE WAS AUTOGENERATED ON %s\n' % timestamp]
    MODE = None
    PYX_MAP = {}
    translate_nLines = 0
    for in_line in input_lines:
        MODE, in_line = pyx_preprocess(MODE, in_line, PYX_MAP)
        if MODE is None:
            # No cython annotations
            output_lines.append(in_line)
        elif MODE == START_TAG:
            # Start unannotating cython code;
            in_line = translate_cython_line(in_line, PYX_MAP)
            output_lines.append(in_line)
        elif MODE == ELSE_TAG:
            # Exclude from cython
            pass
        elif MODE == DEFINE_TAG:
            # Skip the python define
            if in_line.find('#') == -1 and in_line.find('):') != -1:
                MODE = None
                translate_nLines = 0
            # Translate the cython define
            elif in_line.find('#') == 0:
                translate_nLines = 1
        elif MODE == CDEF_TAG:
            translate_nLines = 1
            MODE = None
        # We are translating the next N lines
        if translate_nLines > 0:
            translate_nLines -= 1
            in_line = translate_cython_line(in_line, PYX_MAP)
            output_lines.append(in_line)
    return output_lines
Esempio n. 8
0
def build_rankres_str(allres):
    'Builds csv files showing the cxs/scores/ranks of the query results'
    hs = allres.hs
    #SV = allres.SV
    #qcx2_res = allres.qcx2_res
    cx2_cid = hs.tables.cx2_cid
    #cx2_nx = hs.tables.cx2_nx
    test_samp = hs.test_sample_cx
    train_samp = hs.train_sample_cx
    indx_samp = hs.indexed_sample_cx
    # Get organized data for csv file
    (qcx2_top_true_rank,
     qcx2_top_true_score,
     qcx2_top_true_cx)  = allres.top_true_qcx_arrays

    (qcx2_bot_true_rank,
     qcx2_bot_true_score,
     qcx2_bot_true_cx)  = allres.bot_true_qcx_arrays

    (qcx2_top_false_rank,
     qcx2_top_false_score,
     qcx2_top_false_cx) = allres.top_false_qcx_arrays
    # Number of groundtruth per query
    qcx2_numgt = np.zeros(len(cx2_cid)) - 2
    for qcx in test_samp:
        qcx2_numgt[qcx] = len(hs.get_other_indexed_cxs(qcx))
    # Easy to digest results
    num_chips = len(test_samp)
    num_nonquery = len(np.setdiff1d(indx_samp, test_samp))
    # Find the test samples WITH ground truth
    test_samp_with_gt = np.array(test_samp)[qcx2_numgt[test_samp] > 0]
    if len(test_samp_with_gt) == 0:
        warnings.warn('[rr2] there were no queries with ground truth')
    #train_nxs_set = set(cx2_nx[train_samp])
    flag_cxs_fn = hs.flag_cxs_with_name_in_sample

    def ranks_less_than_(thresh, intrain=None):
        #Find the number of ranks scoring more than thresh
        # Get statistics with respect to the training set
        if len(test_samp_with_gt) == 0:
            test_cxs_ = np.array([])
        elif intrain is None:  # report all
            test_cxs_ =  test_samp_with_gt
        else:  # report either or
            in_train_flag = flag_cxs_fn(test_samp_with_gt, train_samp)
            if intrain is False:
                in_train_flag = True - in_train_flag
            test_cxs_ =  test_samp_with_gt[in_train_flag]
        # number of test samples with ground truth
        num_with_gt = len(test_cxs_)
        if num_with_gt == 0:
            return [], ('NoGT', 'NoGT', -1, 'NoGT')
        # find tests with ranks greater and less than thresh
        testcx2_ttr = qcx2_top_true_rank[test_cxs_]
        greater_cxs = test_cxs_[np.where(testcx2_ttr > thresh)[0]]
        num_greater = len(greater_cxs)
        num_less    = num_with_gt - num_greater
        num_greater = num_with_gt - num_less
        frac_less   = 100.0 * num_less / num_with_gt
        fmt_tup     = (num_less, num_with_gt, frac_less, num_greater)
        return greater_cxs, fmt_tup

    greater5_cxs, fmt5_tup = ranks_less_than_(5)
    greater1_cxs, fmt1_tup = ranks_less_than_(1)
    #
    gt5_intrain_cxs, fmt5_in_tup = ranks_less_than_(5, intrain=True)
    gt1_intrain_cxs, fmt1_in_tup = ranks_less_than_(1, intrain=True)
    #
    gt5_outtrain_cxs, fmt5_out_tup = ranks_less_than_(5, intrain=False)
    gt1_outtrain_cxs, fmt1_out_tup = ranks_less_than_(1, intrain=False)
    #
    allres.greater1_cxs = greater1_cxs
    allres.greater5_cxs = greater5_cxs
    #print('greater5_cxs = %r ' % (allres.greater5_cxs,))
    #print('greater1_cxs = %r ' % (allres.greater1_cxs,))
    # CSV Metadata
    header = '# Experiment allres.title_suffix = ' + allres.title_suffix + '\n'
    header +=  helpers.get_timestamp(format_='comment') + '\n'
    # Scalar summary
    scalar_summary  = '# Num Query Chips: %d \n' % num_chips
    scalar_summary += '# Num Query Chips with at least one match: %d \n' % len(test_samp_with_gt)
    scalar_summary += '# Num NonQuery Chips: %d \n' % num_nonquery
    scalar_summary += '# Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_tup)
    scalar_summary += '# Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (fmt1_tup)

    scalar_summary += '# InTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_in_tup)
    scalar_summary += '# InTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (fmt1_in_tup)

    scalar_summary += '# OutTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_out_tup)
    scalar_summary += '# OutTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (fmt1_out_tup)
    header += scalar_summary
    # Experiment parameters
    #header += '# Full Parameters: \n' + helpers.indent(params.param_string(), '#') + '\n\n'
    # More Metadata
    header += textwrap.dedent('''
    # Rank Result Metadata:
    #   QCX  = Query chip-index
    # QGNAME = Query images name
    # NUMGT  = Num ground truth matches
    #    TT  = top true
    #    BT  = bottom true
    #    TF  = top false''').strip()
    # Build the CSV table
    test_sample_gx = hs.tables.cx2_gx[test_samp]
    test_sample_gname = hs.tables.gx2_gname[test_sample_gx]
    test_sample_gname = [g.replace('.jpg', '') for g in test_sample_gname]
    column_labels = ['QCX', 'NUM GT',
                     'TT CX', 'BT CX', 'TF CX',
                     'TT SCORE', 'BT SCORE', 'TF SCORE',
                     'TT RANK', 'BT RANK', 'TF RANK',
                     'QGNAME', ]
    column_list = [
        test_samp, qcx2_numgt[test_samp],
        qcx2_top_true_cx[test_samp], qcx2_bot_true_cx[test_samp],
        qcx2_top_false_cx[test_samp], qcx2_top_true_score[test_samp],
        qcx2_bot_true_score[test_samp], qcx2_top_false_score[test_samp],
        qcx2_top_true_rank[test_samp], qcx2_bot_true_rank[test_samp],
        qcx2_top_false_rank[test_samp], test_sample_gname, ]
    column_type = [int, int, int, int, int,
                   float, float, float, int, int, int, str, ]
    rankres_str = ld2.make_csv_table(column_labels, column_list, header, column_type)
    # Put some more data at the end
    problem_true_pairs = zip(allres.problem_true.qcxs, allres.problem_true.cxs)
    problem_false_pairs = zip(allres.problem_false.qcxs, allres.problem_false.cxs)
    problem_str = '\n'.join( [
        '#Problem Cases: ',
        '# problem_true_pairs = ' + repr(problem_true_pairs),
        '# problem_false_pairs = ' + repr(problem_false_pairs)])
    rankres_str += '\n' + problem_str
    # Attach results to allres structure
    allres.rankres_str = rankres_str
    allres.scalar_summary = scalar_summary
    allres.problem_false_pairs = problem_false_pairs
    allres.problem_true_pairs = problem_true_pairs
    allres.problem_false_pairs = problem_false_pairs
    allres.problem_true_pairs = problem_true_pairs
Esempio n. 9
0
def build_rankres_str(allres):
    'Builds csv files showing the cxs/scores/ranks of the query results'
    hs = allres.hs
    #SV = allres.SV
    #qcx2_res = allres.qcx2_res
    cx2_cid = hs.tables.cx2_cid
    #cx2_nx = hs.tables.cx2_nx
    test_samp = hs.test_sample_cx
    train_samp = hs.train_sample_cx
    indx_samp = hs.indexed_sample_cx
    # Get organized data for csv file
    (qcx2_top_true_rank, qcx2_top_true_score,
     qcx2_top_true_cx) = allres.top_true_qcx_arrays

    (qcx2_bot_true_rank, qcx2_bot_true_score,
     qcx2_bot_true_cx) = allres.bot_true_qcx_arrays

    (qcx2_top_false_rank, qcx2_top_false_score,
     qcx2_top_false_cx) = allres.top_false_qcx_arrays
    # Number of groundtruth per query
    qcx2_numgt = np.zeros(len(cx2_cid)) - 2
    for qcx in test_samp:
        qcx2_numgt[qcx] = len(hs.get_other_indexed_cxs(qcx))
    # Easy to digest results
    num_chips = len(test_samp)
    num_nonquery = len(np.setdiff1d(indx_samp, test_samp))
    # Find the test samples WITH ground truth
    test_samp_with_gt = np.array(test_samp)[qcx2_numgt[test_samp] > 0]
    if len(test_samp_with_gt) == 0:
        warnings.warn('[rr2] there were no queries with ground truth')
    #train_nxs_set = set(cx2_nx[train_samp])
    flag_cxs_fn = hs.flag_cxs_with_name_in_sample

    def ranks_less_than_(thresh, intrain=None):
        #Find the number of ranks scoring more than thresh
        # Get statistics with respect to the training set
        if len(test_samp_with_gt) == 0:
            test_cxs_ = np.array([])
        elif intrain is None:  # report all
            test_cxs_ = test_samp_with_gt
        else:  # report either or
            in_train_flag = flag_cxs_fn(test_samp_with_gt, train_samp)
            if intrain is False:
                in_train_flag = True - in_train_flag
            test_cxs_ = test_samp_with_gt[in_train_flag]
        # number of test samples with ground truth
        num_with_gt = len(test_cxs_)
        if num_with_gt == 0:
            return [], ('NoGT', 'NoGT', -1, 'NoGT')
        # find tests with ranks greater and less than thresh
        testcx2_ttr = qcx2_top_true_rank[test_cxs_]
        greater_cxs = test_cxs_[np.where(testcx2_ttr > thresh)[0]]
        num_greater = len(greater_cxs)
        num_less = num_with_gt - num_greater
        num_greater = num_with_gt - num_less
        frac_less = 100.0 * num_less / num_with_gt
        fmt_tup = (num_less, num_with_gt, frac_less, num_greater)
        return greater_cxs, fmt_tup

    greater5_cxs, fmt5_tup = ranks_less_than_(5)
    greater1_cxs, fmt1_tup = ranks_less_than_(1)
    #
    gt5_intrain_cxs, fmt5_in_tup = ranks_less_than_(5, intrain=True)
    gt1_intrain_cxs, fmt1_in_tup = ranks_less_than_(1, intrain=True)
    #
    gt5_outtrain_cxs, fmt5_out_tup = ranks_less_than_(5, intrain=False)
    gt1_outtrain_cxs, fmt1_out_tup = ranks_less_than_(1, intrain=False)
    #
    allres.greater1_cxs = greater1_cxs
    allres.greater5_cxs = greater5_cxs
    #print('greater5_cxs = %r ' % (allres.greater5_cxs,))
    #print('greater1_cxs = %r ' % (allres.greater1_cxs,))
    # CSV Metadata
    header = '# Experiment allres.title_suffix = ' + allres.title_suffix + '\n'
    header += helpers.get_timestamp(format_='comment') + '\n'
    # Scalar summary
    scalar_summary = '# Num Query Chips: %d \n' % num_chips
    scalar_summary += '# Num Query Chips with at least one match: %d \n' % len(
        test_samp_with_gt)
    scalar_summary += '# Num NonQuery Chips: %d \n' % num_nonquery
    scalar_summary += '# Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (fmt5_tup)
    scalar_summary += '# Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (
        fmt1_tup)

    scalar_summary += '# InTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (
        fmt5_in_tup)
    scalar_summary += '# InTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (
        fmt1_in_tup)

    scalar_summary += '# OutTrain Ranks <= 5: %r/%r = %.1f%% (missed %r)\n' % (
        fmt5_out_tup)
    scalar_summary += '# OutTrain Ranks <= 1: %r/%r = %.1f%% (missed %r)\n\n' % (
        fmt1_out_tup)
    header += scalar_summary
    # Experiment parameters
    #header += '# Full Parameters: \n' + helpers.indent(params.param_string(), '#') + '\n\n'
    # More Metadata
    header += textwrap.dedent('''
    # Rank Result Metadata:
    #   QCX  = Query chip-index
    # QGNAME = Query images name
    # NUMGT  = Num ground truth matches
    #    TT  = top true
    #    BT  = bottom true
    #    TF  = top false''').strip()
    # Build the CSV table
    test_sample_gx = hs.tables.cx2_gx[test_samp]
    test_sample_gname = hs.tables.gx2_gname[test_sample_gx]
    test_sample_gname = [g.replace('.jpg', '') for g in test_sample_gname]
    column_labels = [
        'QCX',
        'NUM GT',
        'TT CX',
        'BT CX',
        'TF CX',
        'TT SCORE',
        'BT SCORE',
        'TF SCORE',
        'TT RANK',
        'BT RANK',
        'TF RANK',
        'QGNAME',
    ]
    column_list = [
        test_samp,
        qcx2_numgt[test_samp],
        qcx2_top_true_cx[test_samp],
        qcx2_bot_true_cx[test_samp],
        qcx2_top_false_cx[test_samp],
        qcx2_top_true_score[test_samp],
        qcx2_bot_true_score[test_samp],
        qcx2_top_false_score[test_samp],
        qcx2_top_true_rank[test_samp],
        qcx2_bot_true_rank[test_samp],
        qcx2_top_false_rank[test_samp],
        test_sample_gname,
    ]
    column_type = [
        int,
        int,
        int,
        int,
        int,
        float,
        float,
        float,
        int,
        int,
        int,
        str,
    ]
    rankres_str = ld2.make_csv_table(column_labels, column_list, header,
                                     column_type)
    # Put some more data at the end
    problem_true_pairs = zip(allres.problem_true.qcxs, allres.problem_true.cxs)
    problem_false_pairs = zip(allres.problem_false.qcxs,
                              allres.problem_false.cxs)
    problem_str = '\n'.join([
        '#Problem Cases: ',
        '# problem_true_pairs = ' + repr(problem_true_pairs),
        '# problem_false_pairs = ' + repr(problem_false_pairs)
    ])
    rankres_str += '\n' + problem_str
    # Attach results to allres structure
    allres.rankres_str = rankres_str
    allres.scalar_summary = scalar_summary
    allres.problem_false_pairs = problem_false_pairs
    allres.problem_true_pairs = problem_true_pairs
    allres.problem_false_pairs = problem_false_pairs
    allres.problem_true_pairs = problem_true_pairs