Esempio n. 1
0
def sequential_feat_load(feat_cfg, feat_fpath_list):
    kpts_list = []
    desc_list = []
    # Debug loading (seems to use lots of memory)
    print('\n')
    try:
        nFeats = len(feat_fpath_list)
        prog_label = '[fc2] Loading feature: '
        mark_progress, end_progress = helpers.progress_func(nFeats, prog_label)
        for count, feat_path in enumerate(feat_fpath_list):
            try:
                npz = np.load(feat_path, mmap_mode=None)
            except IOError:
                print('\n')
                helpers.checkpath(feat_path, verbose=True)
                print('IOError on feat_path=%r' % feat_path)
                raise
            kpts = npz['arr_0']
            desc = npz['arr_1']
            npz.close()
            kpts_list.append(kpts)
            desc_list.append(desc)
            mark_progress(count)
        end_progress()
        print('[fc2] Finished load of individual kpts and desc')
    except MemoryError:
        print('\n------------')
        print('[fc2] Out of memory')
        print('[fc2] Trying to read: %r' % feat_path)
        print('[fc2] len(kpts_list) = %d' % len(kpts_list))
        print('[fc2] len(desc_list) = %d' % len(desc_list))
        raise
    if feat_cfg.whiten:
        desc_list = whiten_features(desc_list)
    return kpts_list, desc_list
Esempio n. 2
0
 def assign_alternate(tblname):
     path = join(db_dir, tblname)
     if helpers.checkpath(path, verbose=True):
         return path
     path = join(db_dir, '.hs_internals', tblname)
     if helpers.checkpath(path, verbose=True):
         return path
     raise Exception('bad state=%r' % tblname)
Esempio n. 3
0
 def assign_alternate(tblname):
     path = join(db_dir, tblname)
     if helpers.checkpath(path, verbose=True):
         return path
     path = join(db_dir, '.hs_internals', tblname)
     if helpers.checkpath(path, verbose=True):
         return path
     raise Exception('bad state=%r' % tblname)
Esempio n. 4
0
def precompute_flann(data, cache_dir=None, uid='', flann_params=None,
                     force_recompute=False):
    ''' Tries to load a cached flann index before doing anything'''
    print('[algos] precompute_flann(%r): ' % uid)
    cache_dir = '.' if cache_dir is None else cache_dir
    # Generate a unique filename for data and flann parameters
    fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]')
    data_uid = helpers.hashstr_arr(data, 'dID')  # flann is dependent on the data
    flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann'
    # Append any user labels
    flann_fname = 'flann_index_' + uid + flann_suffix
    flann_fpath = os.path.normpath(join(cache_dir, flann_fname))
    # Load the index if it exists
    flann = pyflann.FLANN()
    load_success = False
    if helpers.checkpath(flann_fpath) and not force_recompute:
        try:
            #print('[algos] precompute_flann():
                #trying to load: %r ' % flann_fname)
            flann.load_index(flann_fpath, data)
            print('[algos]...flann cache hit')
            load_success = True
        except Exception as ex:
            print('[algos] precompute_flann(): ...cannot load index')
            print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex,))
    if not load_success:
        # Rebuild the index otherwise
        with helpers.Timer(msg='compute FLANN', newline=False):
            flann.build_index(data, **flann_params)
        print('[algos] precompute_flann(): save_index(%r)' % flann_fname)
        flann.save_index(flann_fpath)
    return flann
Esempio n. 5
0
def precompute_flann(data,
                     cache_dir=None,
                     uid='',
                     flann_params=None,
                     force_recompute=False):
    ''' Tries to load a cached flann index before doing anything'''
    print('[algos] precompute_flann(%r): ' % uid)
    cache_dir = '.' if cache_dir is None else cache_dir
    # Generate a unique filename for data and flann parameters
    fparams_uid = helpers.remove_chars(str(flann_params.values()), ', \'[]')
    data_uid = helpers.hashstr_arr(data,
                                   'dID')  # flann is dependent on the data
    flann_suffix = '_' + fparams_uid + '_' + data_uid + '.flann'
    # Append any user labels
    flann_fname = 'flann_index_' + uid + flann_suffix
    flann_fpath = os.path.normpath(join(cache_dir, flann_fname))
    # Load the index if it exists
    flann = pyflann.FLANN()
    load_success = False
    if helpers.checkpath(flann_fpath) and not force_recompute:
        try:
            #print('[algos] precompute_flann():
            #trying to load: %r ' % flann_fname)
            flann.load_index(flann_fpath, data)
            print('[algos]...flann cache hit')
            load_success = True
        except Exception as ex:
            print('[algos] precompute_flann(): ...cannot load index')
            print('[algos] precompute_flann(): ...caught ex=\n%r' % (ex, ))
    if not load_success:
        # Rebuild the index otherwise
        with helpers.Timer(msg='compute FLANN', newline=False):
            flann.build_index(data, **flann_params)
        print('[algos] precompute_flann(): save_index(%r)' % flann_fname)
        flann.save_index(flann_fpath)
    return flann
Esempio n. 6
0
def detect_checkpath(dir_):
    return helpers.checkpath(dir_, verbose=VERBOSE_DETERMINE_VERSION)
Esempio n. 7
0
def convert_from_oxford_style(db_dir):
    # Get directories for the oxford groundtruth
    oxford_gt_dpath = join(db_dir, 'oxford_style_gt')
    helpers.assertpath(oxford_gt_dpath)
    # Check for corrupted files (Looking at your Paris Buildings Dataset)
    corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt')
    corrupted_gname_set = set([])
    if helpers.checkpath(corrupted_file_fpath):
        with open(corrupted_file_fpath) as f:
            corrupted_gname_list = f.read().splitlines()
        corrupted_gname_set = set(corrupted_gname_list)

    # Recursively get relative path of all files in img_dpath
    print('Loading Oxford Style Images from: ' + db_dir)
    img_dpath = join(db_dir, 'images')
    helpers.assertpath(img_dpath)
    gname_list_ = [
        join(relpath(root, img_dpath), fname).replace('\\',
                                                      '/').replace('./', '')
        for (root, dlist, flist) in os.walk(img_dpath) for fname in iter(flist)
    ]
    gname_list = [
        gname for gname in iter(gname_list_)
        if not gname in corrupted_gname_set and helpers.matches_image(gname)
    ]
    print(' * num_images = %d ' % len(gname_list))

    # Read the Oxford Style Groundtruth files
    print('Loading Oxford Style Names and Chips')
    gt_fname_list = os.listdir(oxford_gt_dpath)
    num_gt_files = len(gt_fname_list)
    query_chips = []
    gname2_chips_raw = collections.defaultdict(list)
    name_set = set([])
    print(' * num_gt_files = %d ' % num_gt_files)
    sys.stdout.write('parsed: 0000/%4d' % (num_gt_files))
    for gtx, gt_fname in enumerate(gt_fname_list):
        sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files))
        if gtx % 10 - 1 == 0:
            sys.stdout.flush()
        if gt_fname == 'corrupted_files.txt':
            continue
        #Get name, quality, and num from fname
        (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname)
        gt_fpath = join(oxford_gt_dpath, gt_fname)
        name_set.add(name)
        oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name, quality,
                                                      img_dpath,
                                                      corrupted_gname_set)
        if quality == 'query':
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                query_chips.append((gname, roi, name, num))
        else:
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                gname2_chips_raw[gname].append((name, roi, quality))
    sys.stdout.write('\n')
    print(' * num_query images = %d ' % len(query_chips))
    # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt)
    gname2_chips = collections.defaultdict(list)
    multinamed_gname_list = []
    for gname, val in gname2_chips_raw.iteritems():
        val_repr = map(repr, val)
        unique_reprs = set(val_repr)
        unique_indexes = [val_repr.index(urep) for urep in unique_reprs]
        for ux in unique_indexes:
            gname2_chips[gname].append(val[ux])
        if len(gname2_chips[gname]) > 1:
            multinamed_gname_list.append(gname)
    # print some statistics
    query_gname_list = [tup[0] for tup in query_chips]
    gname_with_groundtruth_list = gname2_chips.keys()
    gname_without_groundtruth_list = np.setdiff1d(gname_list,
                                                  gname_with_groundtruth_list)
    print(' * num_images = %d ' % len(gname_list))
    print(' * images with groundtruth    = %d ' %
          len(gname_with_groundtruth_list))
    print(' * images without groundtruth = %d ' %
          len(gname_without_groundtruth_list))
    print(' * images with multi-groundtruth = %d ' %
          len(multinamed_gname_list))
    #make sure all queries have ground truth and there are no duplicate queries
    assert len(query_gname_list) == len(
        np.intersect1d(query_gname_list, gname_with_groundtruth_list))
    assert len(query_gname_list) == len(set(query_gname_list))
    # build hotspotter tables
    print('adding to table: ')
    gx2_gname = gname_list
    nx2_name = ['____', '____'] + list(name_set)
    nx2_nid = [1, 1] + range(2, len(name_set) + 2)
    gx2_gid = range(1, len(gx2_gname) + 1)

    cx2_cid = []
    cx2_theta = []
    cx2_quality = []
    cx2_roi = []
    cx2_nx = []
    cx2_gx = []
    prop_dict = {'oxnum': []}

    def add_to_hs_tables(gname, name, roi, quality, num=''):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        if nx == 0:
            nx = 1
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_quality.append(quality)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(0)
        prop_dict['oxnum'].append(num)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid

    for gname, roi, name, num in query_chips:
        add_to_hs_tables(gname, name, roi, 'query', num)
    for gname in gname2_chips.keys():
        if len(gname2_chips[gname]) == 1:
            (name, roi, quality) = gname2_chips[gname][0]
            add_to_hs_tables(gname, name, roi, quality)
        else:
            # just take the first name. This is foobar
            names, rois, qualities = zip(*gname2_chips[gname])
            add_to_hs_tables(gname, names[0], rois[0], qualities[0])
    for gname in gname_without_groundtruth_list:
        gpath = join(img_dpath, gname)
        try:
            (w, h) = Image.open(gpath).size
            roi = [0, 0, w, h]
            add_to_hs_tables(gname, '____', roi, 'unknown')
        except Exception as ex:
            print('Exception ex=%r' % ex)
            print('Not adding gname=%r' % gname)
            print('----')
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    #
    # Write tables
    internal_dir = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi,
                     cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
Esempio n. 8
0
def has_v1_gt(path):
    info_table = join(path, 'animal_info_table.csv')
    print(info_table)
    return helpers.checkpath(info_table, verbose=True)
Esempio n. 9
0
def has_ss_gt(path):
    ss_data = join(path, 'SightingData.csv')
    print(ss_data)
    return helpers.checkpath(ss_data, verbose=True)
Esempio n. 10
0
def convert_from_oxford_style(db_dir):
    # Get directories for the oxford groundtruth
    oxford_gt_dpath      = join(db_dir, 'oxford_style_gt')
    helpers.assertpath(oxford_gt_dpath)
    # Check for corrupted files (Looking at your Paris Buildings Dataset)
    corrupted_file_fpath = join(oxford_gt_dpath, 'corrupted_files.txt')
    corrupted_gname_set = set([])
    if helpers.checkpath(corrupted_file_fpath):
        with open(corrupted_file_fpath) as f:
            corrupted_gname_list = f.read().splitlines()
        corrupted_gname_set = set(corrupted_gname_list)

    # Recursively get relative path of all files in img_dpath
    print('Loading Oxford Style Images from: ' + db_dir)
    img_dpath  = join(db_dir, 'images')
    helpers.assertpath(img_dpath)
    gname_list_ = [join(relpath(root, img_dpath), fname).replace('\\', '/').replace('./', '')
                   for (root, dlist, flist) in os.walk(img_dpath)
                   for fname in iter(flist)]
    gname_list = [gname for gname in iter(gname_list_)
                  if not gname in corrupted_gname_set and helpers.matches_image(gname)]
    print(' * num_images = %d ' % len(gname_list))

    # Read the Oxford Style Groundtruth files
    print('Loading Oxford Style Names and Chips')
    gt_fname_list = os.listdir(oxford_gt_dpath)
    num_gt_files = len(gt_fname_list)
    query_chips  = []
    gname2_chips_raw = collections.defaultdict(list)
    name_set = set([])
    print(' * num_gt_files = %d ' % num_gt_files)
    sys.stdout.write('parsed: 0000/%4d' % (num_gt_files))
    for gtx, gt_fname in enumerate(gt_fname_list):
        sys.stdout.write(('\b' * 9) + '%4d/%4d' % (gtx + 1, num_gt_files))
        if gtx % 10 - 1 == 0:
            sys.stdout.flush()
        if gt_fname == 'corrupted_files.txt':
            continue
        #Get name, quality, and num from fname
        (name, num, quality) = __oxgtfile2_oxsty_gttup(gt_fname)
        gt_fpath = join(oxford_gt_dpath, gt_fname)
        name_set.add(name)
        oxsty_chip_info_sublist = __read_oxsty_gtfile(gt_fpath, name,
                                                      quality, img_dpath,
                                                      corrupted_gname_set)
        if quality == 'query':
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                query_chips.append((gname, roi, name, num))
        else:
            for (gname, roi) in iter(oxsty_chip_info_sublist):
                gname2_chips_raw[gname].append((name, roi, quality))
    sys.stdout.write('\n')
    print(' * num_query images = %d ' % len(query_chips))
    # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt)
    gname2_chips     = collections.defaultdict(list)
    multinamed_gname_list = []
    for gname, val in gname2_chips_raw.iteritems():
        val_repr = map(repr, val)
        unique_reprs = set(val_repr)
        unique_indexes = [val_repr.index(urep) for urep in unique_reprs]
        for ux in unique_indexes:
            gname2_chips[gname].append(val[ux])
        if len(gname2_chips[gname]) > 1:
            multinamed_gname_list.append(gname)
    # print some statistics
    query_gname_list = [tup[0] for tup in query_chips]
    gname_with_groundtruth_list = gname2_chips.keys()
    gname_without_groundtruth_list = np.setdiff1d(gname_list, gname_with_groundtruth_list)
    print(' * num_images = %d ' % len(gname_list))
    print(' * images with groundtruth    = %d ' % len(gname_with_groundtruth_list))
    print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list))
    print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list))
    #make sure all queries have ground truth and there are no duplicate queries
    assert len(query_gname_list) == len(np.intersect1d(query_gname_list, gname_with_groundtruth_list))
    assert len(query_gname_list) == len(set(query_gname_list))
    # build hotspotter tables
    print('adding to table: ')
    gx2_gname = gname_list
    nx2_name  = ['____', '____'] + list(name_set)
    nx2_nid   = [1, 1] + range(2, len(name_set) + 2)
    gx2_gid   = range(1, len(gx2_gname) + 1)

    cx2_cid     = []
    cx2_theta   = []
    cx2_quality = []
    cx2_roi     = []
    cx2_nx      = []
    cx2_gx      = []
    prop_dict   = {'oxnum': []}

    def add_to_hs_tables(gname, name, roi, quality, num=''):
        cid = len(cx2_cid) + 1
        nx = nx2_name.index(name)
        if nx == 0:
            nx = 1
        gx = gx2_gname.index(gname)
        cx2_cid.append(cid)
        cx2_roi.append(roi)
        cx2_quality.append(quality)
        cx2_nx.append(nx)
        cx2_gx.append(gx)
        cx2_theta.append(0)
        prop_dict['oxnum'].append(num)
        sys.stdout.write(('\b' * 10) + 'cid = %4d' % cid)
        return cid

    for gname, roi, name, num in query_chips:
        add_to_hs_tables(gname, name, roi, 'query', num)
    for gname in gname2_chips.keys():
        if len(gname2_chips[gname]) == 1:
            (name, roi, quality) = gname2_chips[gname][0]
            add_to_hs_tables(gname, name, roi, quality)
        else:
            # just take the first name. This is foobar
            names, rois, qualities = zip(*gname2_chips[gname])
            add_to_hs_tables(gname, names[0], rois[0], qualities[0])
    for gname in gname_without_groundtruth_list:
        gpath = join(img_dpath, gname)
        try:
            (w, h) = Image.open(gpath).size
            roi = [0, 0, w, h]
            add_to_hs_tables(gname, '____', roi, 'unknown')
        except Exception as ex:
            print('Exception ex=%r' % ex)
            print('Not adding gname=%r' % gname)
            print('----')
    cx2_nid = np.array(nx2_nid)[cx2_nx]
    cx2_gid = np.array(gx2_gid)[cx2_gx]
    #
    # Write tables
    internal_dir      = join(db_dir, ld2.RDIR_INTERNAL2)
    helpers.ensurepath(internal_dir)
    write_chip_table(internal_dir, cx2_cid, cx2_gid, cx2_nid, cx2_roi, cx2_theta, prop_dict)
    write_name_table(internal_dir, nx2_nid, nx2_name)
    write_image_table(internal_dir, gx2_gid, gx2_gname)
Esempio n. 11
0
def load_csv_tables(db_dir, allow_new_dir=True):
    '''
    Big function which loads the csv tables from a datatabase directory
    Returns HotspotterDirs and HotspotterTables
    '''
    if 'vdd' in sys.argv:
        helpers.vd(db_dir)
    print('\n=============================')
    print('[ld2] Loading hotspotter csv tables: %r' % db_dir)
    print('=============================')
    hs_dirs = ds.HotspotterDirs(db_dir)
    hs_tables = ds.HotspotterTables()
    #exec(hs_dirs.execstr('hs_dirs'))
    #print(hs_dirs.execstr('hs_dirs'))
    img_dir      = hs_dirs.img_dir
    internal_dir = hs_dirs.internal_dir
    db_dir       = hs_dirs.db_dir
    # --- Table Names ---
    chip_table   = join(internal_dir, CHIP_TABLE_FNAME)
    name_table   = join(internal_dir, NAME_TABLE_FNAME)
    image_table  = join(internal_dir, IMAGE_TABLE_FNAME)  # TODO: Make optional
    # --- CHECKS ---
    has_dbdir   = helpers.checkpath(db_dir)
    has_imgdir  = helpers.checkpath(img_dir)
    has_chiptbl = helpers.checkpath(chip_table)
    has_nametbl = helpers.checkpath(name_table)
    has_imgtbl  = helpers.checkpath(image_table)

    # ChipTable Header Markers
    header_numdata = '# NumData '
    header_csvformat_re = '# *ChipID,'
    v12_csvformat_re = r'#[0-9]*\) '
    # Default ChipTable Header Variables
    chip_csv_format = ['ChipID', 'ImgID',  'NameID',   'roi[tl_x  tl_y  w  h]',  'theta']
    v12_csv_format = ['instance_id', 'image_id', 'name_id', 'roi']

    # TODO DETECT OLD FORMATS HERE
    db_version = 'current'
    isCurrentVersion = all([has_dbdir, has_imgdir, has_chiptbl, has_nametbl, has_imgtbl])
    print('[ld2] isCurrentVersion=%r' % isCurrentVersion)
    IS_VERSION_1_OR_2 = False

    if not isCurrentVersion:
        helpers.checkpath(db_dir, verbose=True)
        helpers.checkpath(img_dir, verbose=True)
        helpers.checkpath(chip_table, verbose=True)
        helpers.checkpath(name_table, verbose=True)
        helpers.checkpath(image_table, verbose=True)
        import db_info

        def assign_alternate(tblname):
            path = join(db_dir, tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            path = join(db_dir, '.hs_internals', tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            raise Exception('bad state=%r' % tblname)
        #
        if db_info.has_v2_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v2'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table  = assign_alternate('instance_table.csv')
            name_table  = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_v1_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v1'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table  = assign_alternate('animal_info_table.csv')
            name_table  = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_ss_gt(db_dir):
            db_version = 'stripespotter'
            chip_table = join(db_dir, 'SightingData.csv')

            chip_csv_format = ['imgindex', 'original_filepath', 'roi', 'animal_name']
            header_csvformat_re = '#imgindex,'
            #raise NotImplementedError('stripe spotter conversion')
            if not helpers.checkpath(chip_table, verbose=True):
                raise Exception('bad state chip_table=%r' % chip_table)
        else:
            try:
                db_version = 'current'  # Well almost
                chip_table  = assign_alternate(CHIP_TABLE_FNAME)
                name_table  = assign_alternate(NAME_TABLE_FNAME)
                image_table = assign_alternate(IMAGE_TABLE_FNAME)
            except Exception:
                if db_info.has_partial_gt(db_dir):
                    print('[ld2] detected incomplete database')
                    raise NotImplementedError('partial database recovery')
                elif allow_new_dir:
                    print('[ld2] detected new dir')
                    hs_dirs.ensure_dirs()
                    return hs_dirs, hs_tables, 'newdb'
                else:
                    import traceback
                    print(traceback.format_exc())
                    print('[ld2] I AM IN A BAD STATE!')
                    errmsg  = ''
                    errmsg += ('\n\n!!!!!\n\n')
                    errmsg += ('  ! The data tables seem to not be loaded')
                    errmsg += (' Files in internal dir: %r' % internal_dir)
                    for fname in os.listdir(internal_dir):
                        errmsg += ('   ! fname')
                    errmsg += ('\n\n!!!!!\n\n')
                    print(errmsg)
                    raise Exception(errmsg)
    if not helpers.checkpath(chip_table):
        raise Exception('bad state chip_table=%r' % chip_table)
    print('[ld2] detected %r' % db_version)
    hs_dirs.ensure_dirs()
    print('-------------------------')
    print('[ld2] Loading database tables: ')
    cid_lines  = []
    line_num   = 0
    csv_line   = ''
    csv_fields = []

    # RCOS TODO: We need a more general csv read function
    # which can handle all of these little corner cases delt with here.

    try:
        # ------------------
        # --- READ NAMES ---
        # ------------------
        print('[ld2] Loading name table: %r' % name_table)
        nx2_name = [UNKNOWN_NAME, UNKNOWN_NAME]
        nid2_nx  = {0: 0, 1: 1}
        name_lines = open(name_table, 'r')
        for line_num, csv_line in enumerate(name_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')]
            nid = int(csv_fields[0])
            name = csv_fields[1]
            nid2_nx[nid] = len(nx2_name)
            nx2_name.append(name)
        name_lines.close()
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loaded %r names (excluding unknown names)' % (len(nx2_name) - 2))
            print('[ld2] * Done loading name table')
    except IOError as ex:
        print('IOError: %r' % ex)
        print('[ld2.name] loading without name table')
        #raise
    except Exception as ex:
        print('[ld2.name] ERROR %r' % ex)
        #print('[ld2.name] ERROR name_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2.name] ERROR on line number:  %r' % (line_num))
        print('[ld2.name] ERROR on line:         %r' % (csv_line))
        print('[ld2.name] ERROR on fields:       %r' % (csv_fields))

    try:
        # -------------------
        # --- READ IMAGES ---
        # -------------------
        gx2_gname = []
        gx2_aif   = []
        gid2_gx = {}  # this is not used. It can probably be removed

        def add_image(gname, aif, gid):
            gx = len(gx2_gname)
            gx2_gname.append(gname)
            gx2_aif.append(aif)
            if gid is not None:
                # this is not used. It can probably be removed
                gid2_gx[gid] = gx

        print('[ld2] Loading images')
        # Load Image Table
        # <LEGACY CODE>
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loading image table: %r' % image_table)
        gid_lines = open(image_table, 'r').readlines()
        for line_num, csv_line in enumerate(gid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')]
            gid = int(csv_fields[0])
            # You have 3 csv files. Format == gid, gname.ext, aif
            if len(csv_fields) == 3:
                gname = csv_fields[1]
                aif   = csv_fields[2].lower() in ['true', '1']  # convert to bool correctly
            # You have 4 csv fields. Format == gid, gname, ext, aif
            if len(csv_fields) == 4:
                gname = '.'.join(csv_fields[1:3])
                aif   =  csv_fields[3].lower() in ['true', '1']
            add_image(gname, aif, gid)
        nTableImgs = len(gx2_gname)
        fromTableNames = set(gx2_gname)
        if VERBOSE_LOAD_DATA:
            print('[ld2] * table specified %r images' % nTableImgs)
            # </LEGACY CODE>
            # Load Image Directory
            print('[ld2] * Loading image directory: %r' % img_dir)
        nDirImgs = 0
        nDirImgsAlready = 0
        for fname in os.listdir(img_dir):
            if len(fname) > 4 and fname[-4:].lower() in ['.jpg', '.png', '.tiff']:
                if fname in fromTableNames:
                    nDirImgsAlready += 1
                    continue
                add_image(fname, False, None)
                nDirImgs += 1
        if VERBOSE_LOAD_DATA:
            print('[ld2] * dir specified %r images' % nDirImgs)
            print('[ld2] * %r were already specified in the table' % nDirImgsAlready)
            print('[ld2] * Loaded %r images' % len(gx2_gname))
            print('[ld2] * Done loading images')
    except IOError:
        print('IOError: %r' % ex)
        print('[ld2.img] loading without image table')
        #raise
    except Exception as ex:
        print('[ld2!.img] ERROR %r' % ex)
        #print('[ld2.img] ERROR image_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2!.img] ERROR on line number:  %r' % (line_num))
        print('[ld2!.img] ERROR on line:         %r' % (csv_line))
        print('[ld2!.img] ERROR on fields:       %r' % (csv_fields))
        raise

    try:
        # ------------------
        # --- READ CHIPS ---
        # ------------------
        print('[ld2] Loading chip table: %r' % chip_table)
        # Load Chip Table Header
        cid_lines = open(chip_table, 'r').readlines()
        num_data   = -1
        # Parse Chip Table Header
        for line_num, csv_line in enumerate(cid_lines):
            #print('[LINE %4d] %r' % (line_num, csv_line))
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                continue
            csv_line = csv_line.strip('\n')
            if csv_line.find('#') != 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                break  # Break after header
            if re.search(header_csvformat_re, csv_line) is not None:
                #print('[LINE %4d] SEARCH' % (line_num))
                # Specified Header Variables
                if IS_VERSION_1_OR_2:
                    #print(csv_line)
                    end_ = csv_line.find('-')
                    if end_ != -1:
                        end_ = end_ - 1
                        #print('end_=%r' % end_)
                        fieldname = csv_line[5:end_]
                    else:
                        fieldname = csv_line[5:]
                    #print(fieldname)
                    chip_csv_format += [fieldname]

                else:
                    chip_csv_format = [_.strip() for _ in csv_line.strip('#').split(',')]
                #print('[ld2] read chip_csv_format: %r' % chip_csv_format)
            if csv_line.find(header_numdata) == 0:
                #print('[LINE %4d] NUM_DATA' % (line_num))
                num_data = int(csv_line.replace(header_numdata, ''))
        if IS_VERSION_1_OR_2 and len(chip_csv_format) == 0:
            chip_csv_format = v12_csv_format
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_chips: %r' % num_data)
            print('[ld2] * chip_csv_format: %r ' % chip_csv_format)
        #print('[ld2.chip] Header Columns: %s\n    ' % '\n   '.join(chip_csv_format))
        cid_x   = tryindex(chip_csv_format, 'ChipID', 'imgindex', 'instance_id')
        gid_x   = tryindex(chip_csv_format, 'ImgID', 'image_id')
        nid_x   = tryindex(chip_csv_format, 'NameID', 'name_id')
        roi_x   = tryindex(chip_csv_format, 'roi[tl_x  tl_y  w  h]', 'roi')
        theta_x = tryindex(chip_csv_format, 'theta')
        # new fields
        gname_x = tryindex(chip_csv_format, 'Image', 'original_filepath')
        name_x  = tryindex(chip_csv_format, 'Name', 'animal_name')
        required_x = [cid_x, gid_x, gname_x, nid_x, name_x, roi_x, theta_x]
        # Hotspotter Chip Tables
        cx2_cid   = []
        cx2_nx    = []
        cx2_gx    = []
        cx2_roi   = []
        cx2_theta = []
        # x is a csv field index in this context
        # get csv indexes which are unknown properties
        prop_x_list  = np.setdiff1d(range(len(chip_csv_format)), required_x).tolist()
        px2_prop_key = [chip_csv_format[x] for x in prop_x_list]
        prop_dict = {}
        for prop in iter(px2_prop_key):
            prop_dict[prop] = []
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_user_properties: %r' % (len(prop_dict.keys())))
        # Parse Chip Table
        for line_num, csv_line in enumerate(cid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')]
            #
            # Load Chip ID
            try:
                cid = int(csv_fields[cid_x])
            except ValueError:
                print('[ld2!] cid_x = %r' % cid_x)
                print('[ld2!] csv_fields = %r' % csv_fields)
                print('[ld2!] csv_fields[cid_x] = %r' % csv_fields[cid_x])
                print(chip_csv_format)
                raise
            #
            # Load Chip ROI Info
            if roi_x != -1:
                roi_str = csv_fields[roi_x].strip('[').strip(']')
                roi = [int(round(float(_))) for _ in roi_str.split()]
            #
            # Load Chip theta Info
            if theta_x != -1:
                theta = float(csv_fields[theta_x])
            else:
                theta = 0
            #
            # Load Image ID/X
            if gid_x != -1:
                gid = int(csv_fields[gid_x])
                gx  = gid2_gx[gid]
            elif gname_x != -1:
                gname = csv_fields[gname_x]
                if db_version == 'stripespotter':
                    if not exists(gname):
                        gname = 'img-%07d.jpg' % cid
                        gpath = join(db_dir, 'images', gname)
                        w, h = Image.open(gpath).size
                        roi = [1, 1, w, h]
                try:
                    gx = gx2_gname.index(gname)
                except ValueError:
                    gx = len(gx2_gname)
                    gx2_gname.append(gname)
            #
            # Load Name ID/X
            if nid_x != -1:
                #print('namedbg csv_fields=%r' % csv_fields)
                #print('namedbg nid_x = %r' % nid_x)
                nid = int(csv_fields[nid_x])
                #print('namedbg %r' % nid)
                nx = nid2_nx[nid]
            elif name_x != -1:
                name = csv_fields[name_x]
                try:
                    nx = nx2_name.index(name)
                except ValueError:
                    nx = len(nx2_name)
                    nx2_name.append(name)
            # Append info to cid lists
            cx2_cid.append(cid)
            cx2_gx.append(gx)
            cx2_nx.append(nx)
            cx2_roi.append(roi)
            cx2_theta.append(theta)
            for px, x in enumerate(prop_x_list):
                prop = px2_prop_key[px]
                prop_val = csv_fields[x]
                prop_dict[prop].append(prop_val)
    except Exception as ex:
        print('[chip.ld2] ERROR %r' % ex)
        #print('[chip.ld2] ERROR parsing: %s' % (''.join(cid_lines)))
        print('[chip.ld2] ERROR reading header:  %r' % (line_num))
        print('[chip.ld2] ERROR on line number:  %r' % (line_num))
        print('[chip.ld2] ERROR on line:         %r' % (csv_line))
        print('[chip.ld2] ERROR on fields:       %r' % (csv_fields))
        raise

    if VERBOSE_LOAD_DATA:
        print('[ld2] * Loaded: %r chips' % (len(cx2_cid)))
        print('[ld2] * Done loading chip table')

    # Return all information from load_tables
    #hs_tables.gid2_gx = gid2_gx
    #hs_tables.nid2_nx  = nid2_nx
    hs_tables.init(gx2_gname, gx2_aif,
                   nx2_name,
                   cx2_cid, cx2_nx, cx2_gx,
                   cx2_roi, cx2_theta, prop_dict)

    print('[ld2] Done Loading hotspotter csv tables: %r' % (db_dir))
    if 'vcd' in sys.argv:
        helpers.vd(hs_dirs.computed_dir)
    return hs_dirs, hs_tables, db_version
Esempio n. 12
0
def load_csv_tables(db_dir, allow_new_dir=True):
    '''
    Big function which loads the csv tables from a datatabase directory
    Returns HotspotterDirs and HotspotterTables
    '''
    if 'vdd' in sys.argv:
        helpers.vd(db_dir)
    print('\n=============================')
    print('[ld2] Loading hotspotter csv tables: %r' % db_dir)
    print('=============================')
    hs_dirs = ds.HotspotterDirs(db_dir)
    hs_tables = ds.HotspotterTables()
    #exec(hs_dirs.execstr('hs_dirs'))
    #print(hs_dirs.execstr('hs_dirs'))
    img_dir = hs_dirs.img_dir
    internal_dir = hs_dirs.internal_dir
    db_dir = hs_dirs.db_dir
    # --- Table Names ---
    chip_table = join(internal_dir, CHIP_TABLE_FNAME)
    name_table = join(internal_dir, NAME_TABLE_FNAME)
    image_table = join(internal_dir, IMAGE_TABLE_FNAME)  # TODO: Make optional
    # --- CHECKS ---
    has_dbdir = helpers.checkpath(db_dir)
    has_imgdir = helpers.checkpath(img_dir)
    has_chiptbl = helpers.checkpath(chip_table)
    has_nametbl = helpers.checkpath(name_table)
    has_imgtbl = helpers.checkpath(image_table)

    # ChipTable Header Markers
    header_numdata = '# NumData '
    header_csvformat_re = '# *ChipID,'
    v12_csvformat_re = r'#[0-9]*\) '
    # Default ChipTable Header Variables
    chip_csv_format = [
        'ChipID', 'ImgID', 'NameID', 'roi[tl_x  tl_y  w  h]', 'theta'
    ]
    v12_csv_format = ['instance_id', 'image_id', 'name_id', 'roi']

    # TODO DETECT OLD FORMATS HERE
    db_version = 'current'
    isCurrentVersion = all(
        [has_dbdir, has_imgdir, has_chiptbl, has_nametbl, has_imgtbl])
    print('[ld2] isCurrentVersion=%r' % isCurrentVersion)
    IS_VERSION_1_OR_2 = False

    if not isCurrentVersion:
        helpers.checkpath(db_dir, verbose=True)
        helpers.checkpath(img_dir, verbose=True)
        helpers.checkpath(chip_table, verbose=True)
        helpers.checkpath(name_table, verbose=True)
        helpers.checkpath(image_table, verbose=True)
        import db_info

        def assign_alternate(tblname):
            path = join(db_dir, tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            path = join(db_dir, '.hs_internals', tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            raise Exception('bad state=%r' % tblname)

        #
        if db_info.has_v2_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v2'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table = assign_alternate('instance_table.csv')
            name_table = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_v1_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v1'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table = assign_alternate('animal_info_table.csv')
            name_table = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_ss_gt(db_dir):
            db_version = 'stripespotter'
            chip_table = join(db_dir, 'SightingData.csv')

            chip_csv_format = [
                'imgindex', 'original_filepath', 'roi', 'animal_name'
            ]
            header_csvformat_re = '#imgindex,'
            #raise NotImplementedError('stripe spotter conversion')
            if not helpers.checkpath(chip_table, verbose=True):
                raise Exception('bad state chip_table=%r' % chip_table)
        else:
            try:
                db_version = 'current'  # Well almost
                chip_table = assign_alternate(CHIP_TABLE_FNAME)
                name_table = assign_alternate(NAME_TABLE_FNAME)
                image_table = assign_alternate(IMAGE_TABLE_FNAME)
            except Exception:
                if db_info.has_partial_gt(db_dir):
                    print('[ld2] detected incomplete database')
                    raise NotImplementedError('partial database recovery')
                elif allow_new_dir:
                    print('[ld2] detected new dir')
                    hs_dirs.ensure_dirs()
                    return hs_dirs, hs_tables, 'newdb'
                else:
                    import traceback
                    print(traceback.format_exc())
                    print('[ld2] I AM IN A BAD STATE!')
                    errmsg = ''
                    errmsg += ('\n\n!!!!!\n\n')
                    errmsg += ('  ! The data tables seem to not be loaded')
                    errmsg += (' Files in internal dir: %r' % internal_dir)
                    for fname in os.listdir(internal_dir):
                        errmsg += ('   ! fname')
                    errmsg += ('\n\n!!!!!\n\n')
                    print(errmsg)
                    raise Exception(errmsg)
    if not helpers.checkpath(chip_table):
        raise Exception('bad state chip_table=%r' % chip_table)
    print('[ld2] detected %r' % db_version)
    hs_dirs.ensure_dirs()
    print('-------------------------')
    print('[ld2] Loading database tables: ')
    cid_lines = []
    line_num = 0
    csv_line = ''
    csv_fields = []

    # RCOS TODO: We need a more general csv read function
    # which can handle all of these little corner cases delt with here.

    try:
        # ------------------
        # --- READ NAMES ---
        # ------------------
        print('[ld2] Loading name table: %r' % name_table)
        nx2_name = [UNKNOWN_NAME, UNKNOWN_NAME]
        nid2_nx = {0: 0, 1: 1}
        name_lines = open(name_table, 'r')
        for line_num, csv_line in enumerate(name_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [
                _.strip(' ') for _ in csv_line.strip('\n\r ').split(',')
            ]
            nid = int(csv_fields[0])
            name = csv_fields[1]
            nid2_nx[nid] = len(nx2_name)
            nx2_name.append(name)
        name_lines.close()
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loaded %r names (excluding unknown names)' %
                  (len(nx2_name) - 2))
            print('[ld2] * Done loading name table')
    except IOError as ex:
        print('IOError: %r' % ex)
        print('[ld2.name] loading without name table')
        #raise
    except Exception as ex:
        print('[ld2.name] ERROR %r' % ex)
        #print('[ld2.name] ERROR name_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2.name] ERROR on line number:  %r' % (line_num))
        print('[ld2.name] ERROR on line:         %r' % (csv_line))
        print('[ld2.name] ERROR on fields:       %r' % (csv_fields))

    try:
        # -------------------
        # --- READ IMAGES ---
        # -------------------
        gx2_gname = []
        gx2_aif = []
        gid2_gx = {}  # this is not used. It can probably be removed

        def add_image(gname, aif, gid):
            gx = len(gx2_gname)
            gx2_gname.append(gname)
            gx2_aif.append(aif)
            if gid is not None:
                # this is not used. It can probably be removed
                gid2_gx[gid] = gx

        print('[ld2] Loading images')
        # Load Image Table
        # <LEGACY CODE>
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loading image table: %r' % image_table)
        gid_lines = open(image_table, 'r').readlines()
        for line_num, csv_line in enumerate(gid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [
                _.strip(' ') for _ in csv_line.strip('\n\r ').split(',')
            ]
            gid = int(csv_fields[0])
            # You have 3 csv files. Format == gid, gname.ext, aif
            if len(csv_fields) == 3:
                gname = csv_fields[1]
                aif = csv_fields[2].lower() in ['true', '1'
                                                ]  # convert to bool correctly
            # You have 4 csv fields. Format == gid, gname, ext, aif
            if len(csv_fields) == 4:
                gname = '.'.join(csv_fields[1:3])
                aif = csv_fields[3].lower() in ['true', '1']
            add_image(gname, aif, gid)
        nTableImgs = len(gx2_gname)
        fromTableNames = set(gx2_gname)
        if VERBOSE_LOAD_DATA:
            print('[ld2] * table specified %r images' % nTableImgs)
            # </LEGACY CODE>
            # Load Image Directory
            print('[ld2] * Loading image directory: %r' % img_dir)
        nDirImgs = 0
        nDirImgsAlready = 0
        for fname in os.listdir(img_dir):
            if len(fname) > 4 and fname[-4:].lower() in [
                    '.jpg', '.png', '.tiff'
            ]:
                if fname in fromTableNames:
                    nDirImgsAlready += 1
                    continue
                add_image(fname, False, None)
                nDirImgs += 1
        if VERBOSE_LOAD_DATA:
            print('[ld2] * dir specified %r images' % nDirImgs)
            print('[ld2] * %r were already specified in the table' %
                  nDirImgsAlready)
            print('[ld2] * Loaded %r images' % len(gx2_gname))
            print('[ld2] * Done loading images')
    except IOError:
        print('IOError: %r' % ex)
        print('[ld2.img] loading without image table')
        #raise
    except Exception as ex:
        print('[ld2!.img] ERROR %r' % ex)
        #print('[ld2.img] ERROR image_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2!.img] ERROR on line number:  %r' % (line_num))
        print('[ld2!.img] ERROR on line:         %r' % (csv_line))
        print('[ld2!.img] ERROR on fields:       %r' % (csv_fields))
        raise

    try:
        # ------------------
        # --- READ CHIPS ---
        # ------------------
        print('[ld2] Loading chip table: %r' % chip_table)
        # Load Chip Table Header
        cid_lines = open(chip_table, 'r').readlines()
        num_data = -1
        # Parse Chip Table Header
        for line_num, csv_line in enumerate(cid_lines):
            #print('[LINE %4d] %r' % (line_num, csv_line))
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                continue
            csv_line = csv_line.strip('\n')
            if csv_line.find('#') != 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                break  # Break after header
            if re.search(header_csvformat_re, csv_line) is not None:
                #print('[LINE %4d] SEARCH' % (line_num))
                # Specified Header Variables
                if IS_VERSION_1_OR_2:
                    #print(csv_line)
                    end_ = csv_line.find('-')
                    if end_ != -1:
                        end_ = end_ - 1
                        #print('end_=%r' % end_)
                        fieldname = csv_line[5:end_]
                    else:
                        fieldname = csv_line[5:]
                    #print(fieldname)
                    chip_csv_format += [fieldname]

                else:
                    chip_csv_format = [
                        _.strip() for _ in csv_line.strip('#').split(',')
                    ]
                #print('[ld2] read chip_csv_format: %r' % chip_csv_format)
            if csv_line.find(header_numdata) == 0:
                #print('[LINE %4d] NUM_DATA' % (line_num))
                num_data = int(csv_line.replace(header_numdata, ''))
        if IS_VERSION_1_OR_2 and len(chip_csv_format) == 0:
            chip_csv_format = v12_csv_format
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_chips: %r' % num_data)
            print('[ld2] * chip_csv_format: %r ' % chip_csv_format)
        #print('[ld2.chip] Header Columns: %s\n    ' % '\n   '.join(chip_csv_format))
        cid_x = tryindex(chip_csv_format, 'ChipID', 'imgindex', 'instance_id')
        gid_x = tryindex(chip_csv_format, 'ImgID', 'image_id')
        nid_x = tryindex(chip_csv_format, 'NameID', 'name_id')
        roi_x = tryindex(chip_csv_format, 'roi[tl_x  tl_y  w  h]', 'roi')
        theta_x = tryindex(chip_csv_format, 'theta')
        # new fields
        gname_x = tryindex(chip_csv_format, 'Image', 'original_filepath')
        name_x = tryindex(chip_csv_format, 'Name', 'animal_name')
        required_x = [cid_x, gid_x, gname_x, nid_x, name_x, roi_x, theta_x]
        # Hotspotter Chip Tables
        cx2_cid = []
        cx2_nx = []
        cx2_gx = []
        cx2_roi = []
        cx2_theta = []
        # x is a csv field index in this context
        # get csv indexes which are unknown properties
        prop_x_list = np.setdiff1d(range(len(chip_csv_format)),
                                   required_x).tolist()
        px2_prop_key = [chip_csv_format[x] for x in prop_x_list]
        prop_dict = {}
        for prop in iter(px2_prop_key):
            prop_dict[prop] = []
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_user_properties: %r' % (len(prop_dict.keys())))
        # Parse Chip Table
        for line_num, csv_line in enumerate(cid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [
                _.strip(' ') for _ in csv_line.strip('\n\r ').split(',')
            ]
            #
            # Load Chip ID
            try:
                cid = int(csv_fields[cid_x])
            except ValueError:
                print('[ld2!] cid_x = %r' % cid_x)
                print('[ld2!] csv_fields = %r' % csv_fields)
                print('[ld2!] csv_fields[cid_x] = %r' % csv_fields[cid_x])
                print(chip_csv_format)
                raise
            #
            # Load Chip ROI Info
            if roi_x != -1:
                roi_str = csv_fields[roi_x].strip('[').strip(']')
                roi = [int(round(float(_))) for _ in roi_str.split()]
            #
            # Load Chip theta Info
            if theta_x != -1:
                theta = float(csv_fields[theta_x])
            else:
                theta = 0
            #
            # Load Image ID/X
            if gid_x != -1:
                gid = int(csv_fields[gid_x])
                gx = gid2_gx[gid]
            elif gname_x != -1:
                gname = csv_fields[gname_x]
                if db_version == 'stripespotter':
                    if not exists(gname):
                        gname = 'img-%07d.jpg' % cid
                        gpath = join(db_dir, 'images', gname)
                        w, h = Image.open(gpath).size
                        roi = [1, 1, w, h]
                try:
                    gx = gx2_gname.index(gname)
                except ValueError:
                    gx = len(gx2_gname)
                    gx2_gname.append(gname)
            #
            # Load Name ID/X
            if nid_x != -1:
                #print('namedbg csv_fields=%r' % csv_fields)
                #print('namedbg nid_x = %r' % nid_x)
                nid = int(csv_fields[nid_x])
                #print('namedbg %r' % nid)
                nx = nid2_nx[nid]
            elif name_x != -1:
                name = csv_fields[name_x]
                try:
                    nx = nx2_name.index(name)
                except ValueError:
                    nx = len(nx2_name)
                    nx2_name.append(name)
            # Append info to cid lists
            cx2_cid.append(cid)
            cx2_gx.append(gx)
            cx2_nx.append(nx)
            cx2_roi.append(roi)
            cx2_theta.append(theta)
            for px, x in enumerate(prop_x_list):
                prop = px2_prop_key[px]
                prop_val = csv_fields[x]
                prop_dict[prop].append(prop_val)
    except Exception as ex:
        print('[chip.ld2] ERROR %r' % ex)
        #print('[chip.ld2] ERROR parsing: %s' % (''.join(cid_lines)))
        print('[chip.ld2] ERROR reading header:  %r' % (line_num))
        print('[chip.ld2] ERROR on line number:  %r' % (line_num))
        print('[chip.ld2] ERROR on line:         %r' % (csv_line))
        print('[chip.ld2] ERROR on fields:       %r' % (csv_fields))
        raise

    if VERBOSE_LOAD_DATA:
        print('[ld2] * Loaded: %r chips' % (len(cx2_cid)))
        print('[ld2] * Done loading chip table')

    # Return all information from load_tables
    #hs_tables.gid2_gx = gid2_gx
    #hs_tables.nid2_nx  = nid2_nx
    hs_tables.init(gx2_gname, gx2_aif, nx2_name, cx2_cid, cx2_nx, cx2_gx,
                   cx2_roi, cx2_theta, prop_dict)

    print('[ld2] Done Loading hotspotter csv tables: %r' % (db_dir))
    if 'vcd' in sys.argv:
        helpers.vd(hs_dirs.computed_dir)
    return hs_dirs, hs_tables, db_version
Esempio n. 13
0
def load_chips(hs, cx_list=None, **kwargs):
    print('\n=============================')
    print('[cc2] Precomputing chips and loading chip paths: %r' % hs.get_db_name())
    print('=============================')
    #----------------
    # COMPUTE SETUP
    #----------------
    chip_cfg = hs.prefs.chip_cfg
    chip_uid = chip_cfg.get_uid()
    if hs.cpaths.chip_uid != '' and hs.cpaths.chip_uid != chip_uid:
        print('[cc2] Disagreement: OLD_chip_uid = %r' % hs.cpaths.chip_uid)
        print('[cc2] Disagreement: NEW_chip_uid = %r' % chip_uid)
        print('[cc2] Unloading all chip information')
        hs.unload_all()
    print('[cc2] chip_uid = %r' % chip_uid)
    # Get the list of chips paths to load
    cx_list = hs.get_valid_cxs() if cx_list is None else cx_list
    if not np.iterable(cx_list):
        cx_list = [cx_list]
    if len(cx_list) == 0:
        return  # HACK
    cx_list = np.array(cx_list)  # HACK
    hs.cpaths.chip_uid = chip_uid
    #print('[cc2] Requested %d chips' % (len(cx_list)))
    #print('[cc2] cx_list = %r' % (cx_list,))
    # Get table information
    try:
        gx_list    = hs.tables.cx2_gx[cx_list]
        cid_list   = hs.tables.cx2_cid[cx_list]
        theta_list = hs.tables.cx2_theta[cx_list]
        roi_list   = hs.tables.cx2_roi[cx_list]
        #gname_list = hs.tables.gx2_gname[gx_list]
    except IndexError as ex:
        print(repr(ex))
        print(hs.tables)
        print('cx_list=%r' % (cx_list,))
        raise
    # Get ChipConfig Parameters
    sqrt_area   = chip_cfg['chip_sqrt_area']

    filter_list = []
    if chip_cfg['adapteq']:
        filter_list.append(adapteq_fn)
    if chip_cfg['histeq']:
        filter_list.append(histeq_fn)
    if chip_cfg['region_norm']:
        filter_list.append(region_norm_fn)
    #if chip_cfg['maxcontrast']:
        #filter_list.append(maxcontr_fn)
    #if chip_cfg['rank_eq']:
        #filter_list.append(rankeq_fn)
    #if chip_cfg['local_eq']:
        #filter_list.append(localeq_fn)
    if chip_cfg['grabcut']:
        filter_list.append(grabcut_fn)

    #---------------------------
    # ___Normalized Chip Args___
    #---------------------------
    # Full Image Paths: where to extract the chips from
    gfpath_list = hs.gx2_gname(gx_list, full=True)
    #img_dir = hs.dirs.img_dir
    #gfpath_list = [join(img_dir, gname) for gname in iter(gname_list)]
    # Chip Paths: where to write extracted chips to
    _cfname_fmt = 'cid%d' + chip_uid + '.png'
    _cfpath_fmt = join(hs.dirs.chip_dir, _cfname_fmt)
    cfpath_list = [_cfpath_fmt  % cid for cid in iter(cid_list)]
    # Normalized Chip Sizes: ensure chips have about sqrt_area squared pixels
    chipsz_list = compute_uniform_area_chip_sizes(roi_list, sqrt_area)

    #--------------------------
    # EXTRACT AND RESIZE CHIPS
    #--------------------------
    pcc_kwargs = {
        'arg_list': [gfpath_list, cfpath_list, roi_list, theta_list, chipsz_list],
        'lazy': not hs.args.nocache_chips,
        'num_procs': hs.args.num_procs,
        'common_args': [filter_list]
    }
    # Compute all chips with paramatarized filters
    parallel_compute(compute_chip, **pcc_kwargs)

    # Read sizes
    try:
        rsize_list = [(None, None) if path is None else Image.open(path).size
                      for path in iter(cfpath_list)]
    except IOError as ex:
        import gc
        gc.collect()
        print('[cc] ex=%r' % ex)
        print('path=%r' % path)
        if helpers.checkpath(path, verbose=True):
            import time
            time.sleep(1)  # delays for 1 seconds
            print('[cc] file exists but cause IOError?')
            print('[cc] probably corrupted. Removing it')
            try:
                helpers.remove_file(path)
            except OSError:
                print('Something bad happened')
                raise
        raise
    #----------------------
    # UPDATE API VARIABLES
    #----------------------
    print('[cc2] Done Precomputing chips and loading chip paths')

    # Extend the datastructure if needed
    list_size = max(cx_list) + 1
    #helpers.ensure_list_size(hs.cpaths.cx2_chip_path, list_size)
    helpers.ensure_list_size(hs.cpaths.cx2_rchip_path, list_size)
    helpers.ensure_list_size(hs.cpaths.cx2_rchip_size, list_size)
    # Copy the values into the ChipPaths object
    #for lx, cx in enumerate(cx_list):
        #hs.cpaths.cx2_chip_path[cx] = cfpath_list[lx]
    for lx, cx in enumerate(cx_list):
        hs.cpaths.cx2_rchip_path[cx] = cfpath_list[lx]
    for lx, cx in enumerate(cx_list):
        hs.cpaths.cx2_rchip_size[cx] = rsize_list[lx]
    #hs.load_cx2_rchip_size()  # TODO: Loading rchip size should be handled more robustly
    print('[cc2]=============================')
Esempio n. 14
0
def load_chips(hs, cx_list=None, **kwargs):
    print('\n=============================')
    print('[cc2] Precomputing chips and loading chip paths: %r' %
          hs.get_db_name())
    print('=============================')
    #----------------
    # COMPUTE SETUP
    #----------------
    chip_cfg = hs.prefs.chip_cfg
    chip_uid = chip_cfg.get_uid()
    if hs.cpaths.chip_uid != '' and hs.cpaths.chip_uid != chip_uid:
        print('[cc2] Disagreement: OLD_chip_uid = %r' % hs.cpaths.chip_uid)
        print('[cc2] Disagreement: NEW_chip_uid = %r' % chip_uid)
        print('[cc2] Unloading all chip information')
        hs.unload_all()
    print('[cc2] chip_uid = %r' % chip_uid)
    # Get the list of chips paths to load
    cx_list = hs.get_valid_cxs() if cx_list is None else cx_list
    if not np.iterable(cx_list):
        cx_list = [cx_list]
    if len(cx_list) == 0:
        return  # HACK
    cx_list = np.array(cx_list)  # HACK
    hs.cpaths.chip_uid = chip_uid
    #print('[cc2] Requested %d chips' % (len(cx_list)))
    #print('[cc2] cx_list = %r' % (cx_list,))
    # Get table information
    try:
        gx_list = hs.tables.cx2_gx[cx_list]
        cid_list = hs.tables.cx2_cid[cx_list]
        theta_list = hs.tables.cx2_theta[cx_list]
        roi_list = hs.tables.cx2_roi[cx_list]
        #gname_list = hs.tables.gx2_gname[gx_list]
    except IndexError as ex:
        print(repr(ex))
        print(hs.tables)
        print('cx_list=%r' % (cx_list, ))
        raise
    # Get ChipConfig Parameters
    sqrt_area = chip_cfg['chip_sqrt_area']

    filter_list = []
    if chip_cfg['adapteq']:
        filter_list.append(adapteq_fn)
    if chip_cfg['histeq']:
        filter_list.append(histeq_fn)
    if chip_cfg['region_norm']:
        filter_list.append(region_norm_fn)
    #if chip_cfg['maxcontrast']:
    #filter_list.append(maxcontr_fn)
    #if chip_cfg['rank_eq']:
    #filter_list.append(rankeq_fn)
    #if chip_cfg['local_eq']:
    #filter_list.append(localeq_fn)
    if chip_cfg['grabcut']:
        filter_list.append(grabcut_fn)

    #---------------------------
    # ___Normalized Chip Args___
    #---------------------------
    # Full Image Paths: where to extract the chips from
    gfpath_list = hs.gx2_gname(gx_list, full=True)
    #img_dir = hs.dirs.img_dir
    #gfpath_list = [join(img_dir, gname) for gname in iter(gname_list)]
    # Chip Paths: where to write extracted chips to
    _cfname_fmt = 'cid%d' + chip_uid + '.png'
    _cfpath_fmt = join(hs.dirs.chip_dir, _cfname_fmt)
    cfpath_list = [_cfpath_fmt % cid for cid in iter(cid_list)]
    # Normalized Chip Sizes: ensure chips have about sqrt_area squared pixels
    chipsz_list = compute_uniform_area_chip_sizes(roi_list, sqrt_area)

    #--------------------------
    # EXTRACT AND RESIZE CHIPS
    #--------------------------
    pcc_kwargs = {
        'arg_list':
        [gfpath_list, cfpath_list, roi_list, theta_list, chipsz_list],
        'lazy': not hs.args.nocache_chips,
        'num_procs': hs.args.num_procs,
        'common_args': [filter_list]
    }
    # Compute all chips with paramatarized filters
    parallel_compute(compute_chip, **pcc_kwargs)

    # Read sizes
    try:
        rsize_list = [(None, None) if path is None else Image.open(path).size
                      for path in iter(cfpath_list)]
    except IOError as ex:
        import gc
        gc.collect()
        print('[cc] ex=%r' % ex)
        print('path=%r' % path)
        if helpers.checkpath(path, verbose=True):
            import time
            time.sleep(1)  # delays for 1 seconds
            print('[cc] file exists but cause IOError?')
            print('[cc] probably corrupted. Removing it')
            try:
                helpers.remove_file(path)
            except OSError:
                print('Something bad happened')
                raise
        raise
    #----------------------
    # UPDATE API VARIABLES
    #----------------------
    print('[cc2] Done Precomputing chips and loading chip paths')

    # Extend the datastructure if needed
    list_size = max(cx_list) + 1
    #helpers.ensure_list_size(hs.cpaths.cx2_chip_path, list_size)
    helpers.ensure_list_size(hs.cpaths.cx2_rchip_path, list_size)
    helpers.ensure_list_size(hs.cpaths.cx2_rchip_size, list_size)
    # Copy the values into the ChipPaths object
    #for lx, cx in enumerate(cx_list):
    #hs.cpaths.cx2_chip_path[cx] = cfpath_list[lx]
    for lx, cx in enumerate(cx_list):
        hs.cpaths.cx2_rchip_path[cx] = cfpath_list[lx]
    for lx, cx in enumerate(cx_list):
        hs.cpaths.cx2_rchip_size[cx] = rsize_list[lx]
    #hs.load_cx2_rchip_size()  # TODO: Loading rchip size should be handled more robustly
    print('[cc2]=============================')