Example #1
0
def preload_args_process(args):
    from hscom import helpers
    import sys
    # Process relevant args
    cids = args.query
    if args.vdd:
        helpers.vd(args.dbdir)
        args.vdd = False
    load_all = args.autoquery or len(cids) > 0
    if helpers.inIPython() or '--cmd' in sys.argv:
        args.nosteal = True
    return load_all, cids
Example #2
0
def load_csv_tables(db_dir, allow_new_dir=True):
    '''
    Big function which loads the csv tables from a datatabase directory
    Returns HotspotterDirs and HotspotterTables
    '''
    if 'vdd' in sys.argv:
        helpers.vd(db_dir)
    print('\n=============================')
    print('[ld2] Loading hotspotter csv tables: %r' % db_dir)
    print('=============================')
    hs_dirs = ds.HotspotterDirs(db_dir)
    hs_tables = ds.HotspotterTables()
    #exec(hs_dirs.execstr('hs_dirs'))
    #print(hs_dirs.execstr('hs_dirs'))
    img_dir      = hs_dirs.img_dir
    internal_dir = hs_dirs.internal_dir
    db_dir       = hs_dirs.db_dir
    # --- Table Names ---
    chip_table   = join(internal_dir, CHIP_TABLE_FNAME)
    name_table   = join(internal_dir, NAME_TABLE_FNAME)
    image_table  = join(internal_dir, IMAGE_TABLE_FNAME)  # TODO: Make optional
    # --- CHECKS ---
    has_dbdir   = helpers.checkpath(db_dir)
    has_imgdir  = helpers.checkpath(img_dir)
    has_chiptbl = helpers.checkpath(chip_table)
    has_nametbl = helpers.checkpath(name_table)
    has_imgtbl  = helpers.checkpath(image_table)

    # ChipTable Header Markers
    header_numdata = '# NumData '
    header_csvformat_re = '# *ChipID,'
    v12_csvformat_re = r'#[0-9]*\) '
    # Default ChipTable Header Variables
    chip_csv_format = ['ChipID', 'ImgID',  'NameID',   'roi[tl_x  tl_y  w  h]',  'theta']
    v12_csv_format = ['instance_id', 'image_id', 'name_id', 'roi']

    # TODO DETECT OLD FORMATS HERE
    db_version = 'current'
    isCurrentVersion = all([has_dbdir, has_imgdir, has_chiptbl, has_nametbl, has_imgtbl])
    print('[ld2] isCurrentVersion=%r' % isCurrentVersion)
    IS_VERSION_1_OR_2 = False

    if not isCurrentVersion:
        helpers.checkpath(db_dir, verbose=True)
        helpers.checkpath(img_dir, verbose=True)
        helpers.checkpath(chip_table, verbose=True)
        helpers.checkpath(name_table, verbose=True)
        helpers.checkpath(image_table, verbose=True)
        import db_info

        def assign_alternate(tblname):
            path = join(db_dir, tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            path = join(db_dir, '.hs_internals', tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            raise Exception('bad state=%r' % tblname)
        #
        if db_info.has_v2_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v2'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table  = assign_alternate('instance_table.csv')
            name_table  = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_v1_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v1'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table  = assign_alternate('animal_info_table.csv')
            name_table  = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_ss_gt(db_dir):
            db_version = 'stripespotter'
            chip_table = join(db_dir, 'SightingData.csv')

            chip_csv_format = ['imgindex', 'original_filepath', 'roi', 'animal_name']
            header_csvformat_re = '#imgindex,'
            #raise NotImplementedError('stripe spotter conversion')
            if not helpers.checkpath(chip_table, verbose=True):
                raise Exception('bad state chip_table=%r' % chip_table)
        else:
            try:
                db_version = 'current'  # Well almost
                chip_table  = assign_alternate(CHIP_TABLE_FNAME)
                name_table  = assign_alternate(NAME_TABLE_FNAME)
                image_table = assign_alternate(IMAGE_TABLE_FNAME)
            except Exception:
                if db_info.has_partial_gt(db_dir):
                    print('[ld2] detected incomplete database')
                    raise NotImplementedError('partial database recovery')
                elif allow_new_dir:
                    print('[ld2] detected new dir')
                    hs_dirs.ensure_dirs()
                    return hs_dirs, hs_tables, 'newdb'
                else:
                    import traceback
                    print(traceback.format_exc())
                    print('[ld2] I AM IN A BAD STATE!')
                    errmsg  = ''
                    errmsg += ('\n\n!!!!!\n\n')
                    errmsg += ('  ! The data tables seem to not be loaded')
                    errmsg += (' Files in internal dir: %r' % internal_dir)
                    for fname in os.listdir(internal_dir):
                        errmsg += ('   ! fname')
                    errmsg += ('\n\n!!!!!\n\n')
                    print(errmsg)
                    raise Exception(errmsg)
    if not helpers.checkpath(chip_table):
        raise Exception('bad state chip_table=%r' % chip_table)
    print('[ld2] detected %r' % db_version)
    hs_dirs.ensure_dirs()
    print('-------------------------')
    print('[ld2] Loading database tables: ')
    cid_lines  = []
    line_num   = 0
    csv_line   = ''
    csv_fields = []

    # RCOS TODO: We need a more general csv read function
    # which can handle all of these little corner cases delt with here.

    try:
        # ------------------
        # --- READ NAMES ---
        # ------------------
        print('[ld2] Loading name table: %r' % name_table)
        nx2_name = [UNKNOWN_NAME, UNKNOWN_NAME]
        nid2_nx  = {0: 0, 1: 1}
        name_lines = open(name_table, 'r')
        for line_num, csv_line in enumerate(name_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')]
            nid = int(csv_fields[0])
            name = csv_fields[1]
            nid2_nx[nid] = len(nx2_name)
            nx2_name.append(name)
        name_lines.close()
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loaded %r names (excluding unknown names)' % (len(nx2_name) - 2))
            print('[ld2] * Done loading name table')
    except IOError as ex:
        print('IOError: %r' % ex)
        print('[ld2.name] loading without name table')
        #raise
    except Exception as ex:
        print('[ld2.name] ERROR %r' % ex)
        #print('[ld2.name] ERROR name_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2.name] ERROR on line number:  %r' % (line_num))
        print('[ld2.name] ERROR on line:         %r' % (csv_line))
        print('[ld2.name] ERROR on fields:       %r' % (csv_fields))

    try:
        # -------------------
        # --- READ IMAGES ---
        # -------------------
        gx2_gname = []
        gx2_aif   = []
        gid2_gx = {}  # this is not used. It can probably be removed

        def add_image(gname, aif, gid):
            gx = len(gx2_gname)
            gx2_gname.append(gname)
            gx2_aif.append(aif)
            if gid is not None:
                # this is not used. It can probably be removed
                gid2_gx[gid] = gx

        print('[ld2] Loading images')
        # Load Image Table
        # <LEGACY CODE>
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loading image table: %r' % image_table)
        gid_lines = open(image_table, 'r').readlines()
        for line_num, csv_line in enumerate(gid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')]
            gid = int(csv_fields[0])
            # You have 3 csv files. Format == gid, gname.ext, aif
            if len(csv_fields) == 3:
                gname = csv_fields[1]
                aif   = csv_fields[2].lower() in ['true', '1']  # convert to bool correctly
            # You have 4 csv fields. Format == gid, gname, ext, aif
            if len(csv_fields) == 4:
                gname = '.'.join(csv_fields[1:3])
                aif   =  csv_fields[3].lower() in ['true', '1']
            add_image(gname, aif, gid)
        nTableImgs = len(gx2_gname)
        fromTableNames = set(gx2_gname)
        if VERBOSE_LOAD_DATA:
            print('[ld2] * table specified %r images' % nTableImgs)
            # </LEGACY CODE>
            # Load Image Directory
            print('[ld2] * Loading image directory: %r' % img_dir)
        nDirImgs = 0
        nDirImgsAlready = 0
        for fname in os.listdir(img_dir):
            if len(fname) > 4 and fname[-4:].lower() in ['.jpg', '.png', '.tiff']:
                if fname in fromTableNames:
                    nDirImgsAlready += 1
                    continue
                add_image(fname, False, None)
                nDirImgs += 1
        if VERBOSE_LOAD_DATA:
            print('[ld2] * dir specified %r images' % nDirImgs)
            print('[ld2] * %r were already specified in the table' % nDirImgsAlready)
            print('[ld2] * Loaded %r images' % len(gx2_gname))
            print('[ld2] * Done loading images')
    except IOError:
        print('IOError: %r' % ex)
        print('[ld2.img] loading without image table')
        #raise
    except Exception as ex:
        print('[ld2!.img] ERROR %r' % ex)
        #print('[ld2.img] ERROR image_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2!.img] ERROR on line number:  %r' % (line_num))
        print('[ld2!.img] ERROR on line:         %r' % (csv_line))
        print('[ld2!.img] ERROR on fields:       %r' % (csv_fields))
        raise

    try:
        # ------------------
        # --- READ CHIPS ---
        # ------------------
        print('[ld2] Loading chip table: %r' % chip_table)
        # Load Chip Table Header
        cid_lines = open(chip_table, 'r').readlines()
        num_data   = -1
        # Parse Chip Table Header
        for line_num, csv_line in enumerate(cid_lines):
            #print('[LINE %4d] %r' % (line_num, csv_line))
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                continue
            csv_line = csv_line.strip('\n')
            if csv_line.find('#') != 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                break  # Break after header
            if re.search(header_csvformat_re, csv_line) is not None:
                #print('[LINE %4d] SEARCH' % (line_num))
                # Specified Header Variables
                if IS_VERSION_1_OR_2:
                    #print(csv_line)
                    end_ = csv_line.find('-')
                    if end_ != -1:
                        end_ = end_ - 1
                        #print('end_=%r' % end_)
                        fieldname = csv_line[5:end_]
                    else:
                        fieldname = csv_line[5:]
                    #print(fieldname)
                    chip_csv_format += [fieldname]

                else:
                    chip_csv_format = [_.strip() for _ in csv_line.strip('#').split(',')]
                #print('[ld2] read chip_csv_format: %r' % chip_csv_format)
            if csv_line.find(header_numdata) == 0:
                #print('[LINE %4d] NUM_DATA' % (line_num))
                num_data = int(csv_line.replace(header_numdata, ''))
        if IS_VERSION_1_OR_2 and len(chip_csv_format) == 0:
            chip_csv_format = v12_csv_format
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_chips: %r' % num_data)
            print('[ld2] * chip_csv_format: %r ' % chip_csv_format)
        #print('[ld2.chip] Header Columns: %s\n    ' % '\n   '.join(chip_csv_format))
        cid_x   = tryindex(chip_csv_format, 'ChipID', 'imgindex', 'instance_id')
        gid_x   = tryindex(chip_csv_format, 'ImgID', 'image_id')
        nid_x   = tryindex(chip_csv_format, 'NameID', 'name_id')
        roi_x   = tryindex(chip_csv_format, 'roi[tl_x  tl_y  w  h]', 'roi')
        theta_x = tryindex(chip_csv_format, 'theta')
        # new fields
        gname_x = tryindex(chip_csv_format, 'Image', 'original_filepath')
        name_x  = tryindex(chip_csv_format, 'Name', 'animal_name')
        required_x = [cid_x, gid_x, gname_x, nid_x, name_x, roi_x, theta_x]
        # Hotspotter Chip Tables
        cx2_cid   = []
        cx2_nx    = []
        cx2_gx    = []
        cx2_roi   = []
        cx2_theta = []
        # x is a csv field index in this context
        # get csv indexes which are unknown properties
        prop_x_list  = np.setdiff1d(range(len(chip_csv_format)), required_x).tolist()
        px2_prop_key = [chip_csv_format[x] for x in prop_x_list]
        prop_dict = {}
        for prop in iter(px2_prop_key):
            prop_dict[prop] = []
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_user_properties: %r' % (len(prop_dict.keys())))
        # Parse Chip Table
        for line_num, csv_line in enumerate(cid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [_.strip(' ') for _ in csv_line.strip('\n\r ').split(',')]
            #
            # Load Chip ID
            try:
                cid = int(csv_fields[cid_x])
            except ValueError:
                print('[ld2!] cid_x = %r' % cid_x)
                print('[ld2!] csv_fields = %r' % csv_fields)
                print('[ld2!] csv_fields[cid_x] = %r' % csv_fields[cid_x])
                print(chip_csv_format)
                raise
            #
            # Load Chip ROI Info
            if roi_x != -1:
                roi_str = csv_fields[roi_x].strip('[').strip(']')
                roi = [int(round(float(_))) for _ in roi_str.split()]
            #
            # Load Chip theta Info
            if theta_x != -1:
                theta = float(csv_fields[theta_x])
            else:
                theta = 0
            #
            # Load Image ID/X
            if gid_x != -1:
                gid = int(csv_fields[gid_x])
                gx  = gid2_gx[gid]
            elif gname_x != -1:
                gname = csv_fields[gname_x]
                if db_version == 'stripespotter':
                    if not exists(gname):
                        gname = 'img-%07d.jpg' % cid
                        gpath = join(db_dir, 'images', gname)
                        w, h = Image.open(gpath).size
                        roi = [1, 1, w, h]
                try:
                    gx = gx2_gname.index(gname)
                except ValueError:
                    gx = len(gx2_gname)
                    gx2_gname.append(gname)
            #
            # Load Name ID/X
            if nid_x != -1:
                #print('namedbg csv_fields=%r' % csv_fields)
                #print('namedbg nid_x = %r' % nid_x)
                nid = int(csv_fields[nid_x])
                #print('namedbg %r' % nid)
                nx = nid2_nx[nid]
            elif name_x != -1:
                name = csv_fields[name_x]
                try:
                    nx = nx2_name.index(name)
                except ValueError:
                    nx = len(nx2_name)
                    nx2_name.append(name)
            # Append info to cid lists
            cx2_cid.append(cid)
            cx2_gx.append(gx)
            cx2_nx.append(nx)
            cx2_roi.append(roi)
            cx2_theta.append(theta)
            for px, x in enumerate(prop_x_list):
                prop = px2_prop_key[px]
                prop_val = csv_fields[x]
                prop_dict[prop].append(prop_val)
    except Exception as ex:
        print('[chip.ld2] ERROR %r' % ex)
        #print('[chip.ld2] ERROR parsing: %s' % (''.join(cid_lines)))
        print('[chip.ld2] ERROR reading header:  %r' % (line_num))
        print('[chip.ld2] ERROR on line number:  %r' % (line_num))
        print('[chip.ld2] ERROR on line:         %r' % (csv_line))
        print('[chip.ld2] ERROR on fields:       %r' % (csv_fields))
        raise

    if VERBOSE_LOAD_DATA:
        print('[ld2] * Loaded: %r chips' % (len(cx2_cid)))
        print('[ld2] * Done loading chip table')

    # Return all information from load_tables
    #hs_tables.gid2_gx = gid2_gx
    #hs_tables.nid2_nx  = nid2_nx
    hs_tables.init(gx2_gname, gx2_aif,
                   nx2_name,
                   cx2_cid, cx2_nx, cx2_gx,
                   cx2_roi, cx2_theta, prop_dict)

    print('[ld2] Done Loading hotspotter csv tables: %r' % (db_dir))
    if 'vcd' in sys.argv:
        helpers.vd(hs_dirs.computed_dir)
    return hs_dirs, hs_tables, db_version
Example #3
0
 def vrd(hs):
     result_dir = os.path.normpath(hs.dirs.result_dir)
     print('[hs] viewing result_dir: %r ' % result_dir)
     helpers.vd(result_dir)
Example #4
0
def vdg(hs):
    output_dir = get_training_output_dir(hs)
    helpers.vd(output_dir)
Example #5
0
 def vcd(hs):
     computed_dir = os.path.normpath(hs.dirs.computed_dir)
     print('[hs] viewing computed_dir: %r ' % computed_dir)
     helpers.vd(computed_dir)
Example #6
0
 def vgd(hs):
     global_dir = io.GLOBAL_CACHE_DIR
     print('[hs] viewing global_dir: %r ' % global_dir)
     helpers.vd(global_dir)
Example #7
0
 def vrd(hs):
     result_dir = os.path.normpath(hs.dirs.result_dir)
     print('[hs] viewing result_dir: %r ' % result_dir)
     helpers.vd(result_dir)
Example #8
0
 def vdd(hs):
     db_dir = os.path.normpath(hs.dirs.db_dir)
     print('[hs] viewing db_dir: %r ' % db_dir)
     helpers.vd(db_dir)
Example #9
0
 def vgd(hs):
     global_dir = io.GLOBAL_CACHE_DIR
     print('[hs] viewing global_dir: %r ' % global_dir)
     helpers.vd(global_dir)
Example #10
0
 def vcd(hs):
     computed_dir = os.path.normpath(hs.dirs.computed_dir)
     print('[hs] viewing computed_dir: %r ' % computed_dir)
     helpers.vd(computed_dir)
Example #11
0
 def vdd(hs):
     db_dir = os.path.normpath(hs.dirs.db_dir)
     print('[hs] viewing db_dir: %r ' % db_dir)
     helpers.vd(db_dir)
Example #12
0
def load_csv_tables(db_dir, allow_new_dir=True):
    '''
    Big function which loads the csv tables from a datatabase directory
    Returns HotspotterDirs and HotspotterTables
    '''
    if 'vdd' in sys.argv:
        helpers.vd(db_dir)
    print('\n=============================')
    print('[ld2] Loading hotspotter csv tables: %r' % db_dir)
    print('=============================')
    hs_dirs = ds.HotspotterDirs(db_dir)
    hs_tables = ds.HotspotterTables()
    #exec(hs_dirs.execstr('hs_dirs'))
    #print(hs_dirs.execstr('hs_dirs'))
    img_dir = hs_dirs.img_dir
    internal_dir = hs_dirs.internal_dir
    db_dir = hs_dirs.db_dir
    # --- Table Names ---
    chip_table = join(internal_dir, CHIP_TABLE_FNAME)
    name_table = join(internal_dir, NAME_TABLE_FNAME)
    image_table = join(internal_dir, IMAGE_TABLE_FNAME)  # TODO: Make optional
    # --- CHECKS ---
    has_dbdir = helpers.checkpath(db_dir)
    has_imgdir = helpers.checkpath(img_dir)
    has_chiptbl = helpers.checkpath(chip_table)
    has_nametbl = helpers.checkpath(name_table)
    has_imgtbl = helpers.checkpath(image_table)

    # ChipTable Header Markers
    header_numdata = '# NumData '
    header_csvformat_re = '# *ChipID,'
    v12_csvformat_re = r'#[0-9]*\) '
    # Default ChipTable Header Variables
    chip_csv_format = [
        'ChipID', 'ImgID', 'NameID', 'roi[tl_x  tl_y  w  h]', 'theta'
    ]
    v12_csv_format = ['instance_id', 'image_id', 'name_id', 'roi']

    # TODO DETECT OLD FORMATS HERE
    db_version = 'current'
    isCurrentVersion = all(
        [has_dbdir, has_imgdir, has_chiptbl, has_nametbl, has_imgtbl])
    print('[ld2] isCurrentVersion=%r' % isCurrentVersion)
    IS_VERSION_1_OR_2 = False

    if not isCurrentVersion:
        helpers.checkpath(db_dir, verbose=True)
        helpers.checkpath(img_dir, verbose=True)
        helpers.checkpath(chip_table, verbose=True)
        helpers.checkpath(name_table, verbose=True)
        helpers.checkpath(image_table, verbose=True)
        import db_info

        def assign_alternate(tblname):
            path = join(db_dir, tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            path = join(db_dir, '.hs_internals', tblname)
            if helpers.checkpath(path, verbose=True):
                return path
            raise Exception('bad state=%r' % tblname)

        #
        if db_info.has_v2_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v2'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table = assign_alternate('instance_table.csv')
            name_table = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_v1_gt(db_dir):
            IS_VERSION_1_OR_2 = True
            db_version = 'hotspotter-v1'
            chip_csv_format = []
            header_csvformat_re = v12_csvformat_re
            chip_table = assign_alternate('animal_info_table.csv')
            name_table = assign_alternate('name_table.csv')
            image_table = assign_alternate('image_table.csv')
        #
        elif db_info.has_ss_gt(db_dir):
            db_version = 'stripespotter'
            chip_table = join(db_dir, 'SightingData.csv')

            chip_csv_format = [
                'imgindex', 'original_filepath', 'roi', 'animal_name'
            ]
            header_csvformat_re = '#imgindex,'
            #raise NotImplementedError('stripe spotter conversion')
            if not helpers.checkpath(chip_table, verbose=True):
                raise Exception('bad state chip_table=%r' % chip_table)
        else:
            try:
                db_version = 'current'  # Well almost
                chip_table = assign_alternate(CHIP_TABLE_FNAME)
                name_table = assign_alternate(NAME_TABLE_FNAME)
                image_table = assign_alternate(IMAGE_TABLE_FNAME)
            except Exception:
                if db_info.has_partial_gt(db_dir):
                    print('[ld2] detected incomplete database')
                    raise NotImplementedError('partial database recovery')
                elif allow_new_dir:
                    print('[ld2] detected new dir')
                    hs_dirs.ensure_dirs()
                    return hs_dirs, hs_tables, 'newdb'
                else:
                    import traceback
                    print(traceback.format_exc())
                    print('[ld2] I AM IN A BAD STATE!')
                    errmsg = ''
                    errmsg += ('\n\n!!!!!\n\n')
                    errmsg += ('  ! The data tables seem to not be loaded')
                    errmsg += (' Files in internal dir: %r' % internal_dir)
                    for fname in os.listdir(internal_dir):
                        errmsg += ('   ! fname')
                    errmsg += ('\n\n!!!!!\n\n')
                    print(errmsg)
                    raise Exception(errmsg)
    if not helpers.checkpath(chip_table):
        raise Exception('bad state chip_table=%r' % chip_table)
    print('[ld2] detected %r' % db_version)
    hs_dirs.ensure_dirs()
    print('-------------------------')
    print('[ld2] Loading database tables: ')
    cid_lines = []
    line_num = 0
    csv_line = ''
    csv_fields = []

    # RCOS TODO: We need a more general csv read function
    # which can handle all of these little corner cases delt with here.

    try:
        # ------------------
        # --- READ NAMES ---
        # ------------------
        print('[ld2] Loading name table: %r' % name_table)
        nx2_name = [UNKNOWN_NAME, UNKNOWN_NAME]
        nid2_nx = {0: 0, 1: 1}
        name_lines = open(name_table, 'r')
        for line_num, csv_line in enumerate(name_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [
                _.strip(' ') for _ in csv_line.strip('\n\r ').split(',')
            ]
            nid = int(csv_fields[0])
            name = csv_fields[1]
            nid2_nx[nid] = len(nx2_name)
            nx2_name.append(name)
        name_lines.close()
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loaded %r names (excluding unknown names)' %
                  (len(nx2_name) - 2))
            print('[ld2] * Done loading name table')
    except IOError as ex:
        print('IOError: %r' % ex)
        print('[ld2.name] loading without name table')
        #raise
    except Exception as ex:
        print('[ld2.name] ERROR %r' % ex)
        #print('[ld2.name] ERROR name_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2.name] ERROR on line number:  %r' % (line_num))
        print('[ld2.name] ERROR on line:         %r' % (csv_line))
        print('[ld2.name] ERROR on fields:       %r' % (csv_fields))

    try:
        # -------------------
        # --- READ IMAGES ---
        # -------------------
        gx2_gname = []
        gx2_aif = []
        gid2_gx = {}  # this is not used. It can probably be removed

        def add_image(gname, aif, gid):
            gx = len(gx2_gname)
            gx2_gname.append(gname)
            gx2_aif.append(aif)
            if gid is not None:
                # this is not used. It can probably be removed
                gid2_gx[gid] = gx

        print('[ld2] Loading images')
        # Load Image Table
        # <LEGACY CODE>
        if VERBOSE_LOAD_DATA:
            print('[ld2] * Loading image table: %r' % image_table)
        gid_lines = open(image_table, 'r').readlines()
        for line_num, csv_line in enumerate(gid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [
                _.strip(' ') for _ in csv_line.strip('\n\r ').split(',')
            ]
            gid = int(csv_fields[0])
            # You have 3 csv files. Format == gid, gname.ext, aif
            if len(csv_fields) == 3:
                gname = csv_fields[1]
                aif = csv_fields[2].lower() in ['true', '1'
                                                ]  # convert to bool correctly
            # You have 4 csv fields. Format == gid, gname, ext, aif
            if len(csv_fields) == 4:
                gname = '.'.join(csv_fields[1:3])
                aif = csv_fields[3].lower() in ['true', '1']
            add_image(gname, aif, gid)
        nTableImgs = len(gx2_gname)
        fromTableNames = set(gx2_gname)
        if VERBOSE_LOAD_DATA:
            print('[ld2] * table specified %r images' % nTableImgs)
            # </LEGACY CODE>
            # Load Image Directory
            print('[ld2] * Loading image directory: %r' % img_dir)
        nDirImgs = 0
        nDirImgsAlready = 0
        for fname in os.listdir(img_dir):
            if len(fname) > 4 and fname[-4:].lower() in [
                    '.jpg', '.png', '.tiff'
            ]:
                if fname in fromTableNames:
                    nDirImgsAlready += 1
                    continue
                add_image(fname, False, None)
                nDirImgs += 1
        if VERBOSE_LOAD_DATA:
            print('[ld2] * dir specified %r images' % nDirImgs)
            print('[ld2] * %r were already specified in the table' %
                  nDirImgsAlready)
            print('[ld2] * Loaded %r images' % len(gx2_gname))
            print('[ld2] * Done loading images')
    except IOError:
        print('IOError: %r' % ex)
        print('[ld2.img] loading without image table')
        #raise
    except Exception as ex:
        print('[ld2!.img] ERROR %r' % ex)
        #print('[ld2.img] ERROR image_tbl parsing: %s' % (''.join(cid_lines)))
        print('[ld2!.img] ERROR on line number:  %r' % (line_num))
        print('[ld2!.img] ERROR on line:         %r' % (csv_line))
        print('[ld2!.img] ERROR on fields:       %r' % (csv_fields))
        raise

    try:
        # ------------------
        # --- READ CHIPS ---
        # ------------------
        print('[ld2] Loading chip table: %r' % chip_table)
        # Load Chip Table Header
        cid_lines = open(chip_table, 'r').readlines()
        num_data = -1
        # Parse Chip Table Header
        for line_num, csv_line in enumerate(cid_lines):
            #print('[LINE %4d] %r' % (line_num, csv_line))
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                continue
            csv_line = csv_line.strip('\n')
            if csv_line.find('#') != 0:
                #print('[LINE %4d] BROKEN' % (line_num))
                break  # Break after header
            if re.search(header_csvformat_re, csv_line) is not None:
                #print('[LINE %4d] SEARCH' % (line_num))
                # Specified Header Variables
                if IS_VERSION_1_OR_2:
                    #print(csv_line)
                    end_ = csv_line.find('-')
                    if end_ != -1:
                        end_ = end_ - 1
                        #print('end_=%r' % end_)
                        fieldname = csv_line[5:end_]
                    else:
                        fieldname = csv_line[5:]
                    #print(fieldname)
                    chip_csv_format += [fieldname]

                else:
                    chip_csv_format = [
                        _.strip() for _ in csv_line.strip('#').split(',')
                    ]
                #print('[ld2] read chip_csv_format: %r' % chip_csv_format)
            if csv_line.find(header_numdata) == 0:
                #print('[LINE %4d] NUM_DATA' % (line_num))
                num_data = int(csv_line.replace(header_numdata, ''))
        if IS_VERSION_1_OR_2 and len(chip_csv_format) == 0:
            chip_csv_format = v12_csv_format
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_chips: %r' % num_data)
            print('[ld2] * chip_csv_format: %r ' % chip_csv_format)
        #print('[ld2.chip] Header Columns: %s\n    ' % '\n   '.join(chip_csv_format))
        cid_x = tryindex(chip_csv_format, 'ChipID', 'imgindex', 'instance_id')
        gid_x = tryindex(chip_csv_format, 'ImgID', 'image_id')
        nid_x = tryindex(chip_csv_format, 'NameID', 'name_id')
        roi_x = tryindex(chip_csv_format, 'roi[tl_x  tl_y  w  h]', 'roi')
        theta_x = tryindex(chip_csv_format, 'theta')
        # new fields
        gname_x = tryindex(chip_csv_format, 'Image', 'original_filepath')
        name_x = tryindex(chip_csv_format, 'Name', 'animal_name')
        required_x = [cid_x, gid_x, gname_x, nid_x, name_x, roi_x, theta_x]
        # Hotspotter Chip Tables
        cx2_cid = []
        cx2_nx = []
        cx2_gx = []
        cx2_roi = []
        cx2_theta = []
        # x is a csv field index in this context
        # get csv indexes which are unknown properties
        prop_x_list = np.setdiff1d(range(len(chip_csv_format)),
                                   required_x).tolist()
        px2_prop_key = [chip_csv_format[x] for x in prop_x_list]
        prop_dict = {}
        for prop in iter(px2_prop_key):
            prop_dict[prop] = []
        if VERBOSE_LOAD_DATA:
            print('[ld2] * num_user_properties: %r' % (len(prop_dict.keys())))
        # Parse Chip Table
        for line_num, csv_line in enumerate(cid_lines):
            csv_line = csv_line.strip('\n\r\t ')
            if len(csv_line) == 0 or csv_line.find('#') == 0:
                continue
            csv_fields = [
                _.strip(' ') for _ in csv_line.strip('\n\r ').split(',')
            ]
            #
            # Load Chip ID
            try:
                cid = int(csv_fields[cid_x])
            except ValueError:
                print('[ld2!] cid_x = %r' % cid_x)
                print('[ld2!] csv_fields = %r' % csv_fields)
                print('[ld2!] csv_fields[cid_x] = %r' % csv_fields[cid_x])
                print(chip_csv_format)
                raise
            #
            # Load Chip ROI Info
            if roi_x != -1:
                roi_str = csv_fields[roi_x].strip('[').strip(']')
                roi = [int(round(float(_))) for _ in roi_str.split()]
            #
            # Load Chip theta Info
            if theta_x != -1:
                theta = float(csv_fields[theta_x])
            else:
                theta = 0
            #
            # Load Image ID/X
            if gid_x != -1:
                gid = int(csv_fields[gid_x])
                gx = gid2_gx[gid]
            elif gname_x != -1:
                gname = csv_fields[gname_x]
                if db_version == 'stripespotter':
                    if not exists(gname):
                        gname = 'img-%07d.jpg' % cid
                        gpath = join(db_dir, 'images', gname)
                        w, h = Image.open(gpath).size
                        roi = [1, 1, w, h]
                try:
                    gx = gx2_gname.index(gname)
                except ValueError:
                    gx = len(gx2_gname)
                    gx2_gname.append(gname)
            #
            # Load Name ID/X
            if nid_x != -1:
                #print('namedbg csv_fields=%r' % csv_fields)
                #print('namedbg nid_x = %r' % nid_x)
                nid = int(csv_fields[nid_x])
                #print('namedbg %r' % nid)
                nx = nid2_nx[nid]
            elif name_x != -1:
                name = csv_fields[name_x]
                try:
                    nx = nx2_name.index(name)
                except ValueError:
                    nx = len(nx2_name)
                    nx2_name.append(name)
            # Append info to cid lists
            cx2_cid.append(cid)
            cx2_gx.append(gx)
            cx2_nx.append(nx)
            cx2_roi.append(roi)
            cx2_theta.append(theta)
            for px, x in enumerate(prop_x_list):
                prop = px2_prop_key[px]
                prop_val = csv_fields[x]
                prop_dict[prop].append(prop_val)
    except Exception as ex:
        print('[chip.ld2] ERROR %r' % ex)
        #print('[chip.ld2] ERROR parsing: %s' % (''.join(cid_lines)))
        print('[chip.ld2] ERROR reading header:  %r' % (line_num))
        print('[chip.ld2] ERROR on line number:  %r' % (line_num))
        print('[chip.ld2] ERROR on line:         %r' % (csv_line))
        print('[chip.ld2] ERROR on fields:       %r' % (csv_fields))
        raise

    if VERBOSE_LOAD_DATA:
        print('[ld2] * Loaded: %r chips' % (len(cx2_cid)))
        print('[ld2] * Done loading chip table')

    # Return all information from load_tables
    #hs_tables.gid2_gx = gid2_gx
    #hs_tables.nid2_nx  = nid2_nx
    hs_tables.init(gx2_gname, gx2_aif, nx2_name, cx2_cid, cx2_nx, cx2_gx,
                   cx2_roi, cx2_theta, prop_dict)

    print('[ld2] Done Loading hotspotter csv tables: %r' % (db_dir))
    if 'vcd' in sys.argv:
        helpers.vd(hs_dirs.computed_dir)
    return hs_dirs, hs_tables, db_version
Example #13
0
def vdg(hs):
    output_dir = get_training_output_dir(hs)
    util.vd(output_dir)