Beispiel #1
0
def fullOf(x,istrain,imgid_fmt,dataset):
    if dataset == 'COCO':
        s = "train" if istrain else "val"
        d = params.root("{}_images".format(s))
        if imgid_fmt:
            imgnum = x.split("_")[0]
            digits = len(str(imgnum))
            return os.path.join(d,cocofmt(istrain,imgnum))
        else:
            fname = s.split("_")[0:2] + ".jpg"
            return os.path.join(d,fname)
    elif dataset == 'pascal':
        return os.path.join(params.root("val_images/"+ "_".join(x.split("_")[0:2]) + ".jpg"))
Beispiel #2
0
def affinity_outfmt(x,splitid,nickname,num_candidates,even,perfect):
    return params.root("kernels/{}_{}_{}_{}_{}_{}.pkl".format(x,splitid,nickname,num_candidates,even,perfect))
Beispiel #3
0
 def inner(tomatch=None,specified=None):
     '''
     Train happens with train_ctxpatches.
     Discovery happens with candidates. include_saliency can happen with candidates.
     tomatch is a list of categories.
     '''
     if dataset == 'pascal':
         assert(not include_saliency)
         names,canids,labels,bboxs = [],[],[],[]
         if not tomatch is None:
             while len(names) < N:
                 i = len(names)
                 if specified is None:
                     if random.random() < constants.negprop:
                         idx = random.choice(range(len(all_names[0])))
                     else:
                         idx = random.choice(np.where(all_labels == tomatch[i])[0])
                 else:
                     idx = np.argmax(all_names == specified[i])
                 name,canid,label = all_names[0][idx],all_names[1][idx],all_labels[idx]
                 bbox = bboxOf((name,canid))
                 if bbox is False:
                     continue
                 names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg"))
                 labels.append(label)
                 canids.append(canid)
                 bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
         else:
             while len(names) < N:
                 idx = random.choice(range(len(all_names[0])))
                 name,canid = all_names[0][idx],all_names[1][idx]
                 bbox = bboxOf((name,canid))
                 if bbox is False:
                     continue
                 label = all_labels[idx]
                 canids.append(canid)
                 names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg"))
                 labels.append(label)
                 bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
         names,labels = np.array(names),np.array(labels)
         imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names])
         if full_img:
             full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names])
             assert(np.max(imgs) <= 1 and np.min(imgs) >= 0)
             assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0)
             return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names)
         else:
             return(imgs,labels,names)
     if dataset == 'COCO':
         if include_saliency:
             sal_imgs,imgs,labels = [],[],[]
             conn = sqlite3.connect(params.read_db,timeout=300)
             while (len(imgs) < N) and (len(sal_imgs) < N):
                 candidates = val_candidates.sample(N)
                 try: # some of these may fail.
                     for rowid,row in candidates.iterrows():
                         imgs.append(imread_wrap(os.path.join(params.root('val_images'),row['imgname'] + ".jpg")))
                         imgcans = val_candidates[val_candidates['imgname'] == row['imgname']]
                         canid = np.argmax(np.all((row == imgcans).values,axis=1))
                         cat = pd.read_sql("SELECT classname FROM ground_truth WHERE imgname = '{}' AND canid = {}".format(row['imgname'],canid),conn)['classname'].values[0]
                         labels.append(cat)
                         sal_imgs.append(imresize(imread(params.root("val_saliency/") + row['imgname'] + ".jpg")[row['miny']:row['maxy'],row['minx']:row['maxx']],(224,224)))
                 except:
                     imgs,labels,sal_imgs = [], [], []
                     continue
                 conn.close()
             return(np.array(imgs),np.array(labels),np.array(sal_imgs))
         else:
             names,labels,bboxs = [],[],[]
             if not tomatch is None:
                 while len(names) < N:
                     i = len(names)
                     if specified is None:
                         if random.random() < constants.negprop:
                             idx = random.choice(range(len(all_names)))
                         else:
                             idx = random.choice(np.where(all_labels == tomatch[i])[0])
                     else:
                         idx = np.argmax(all_names == specified[i])
                     name,label = all_names[idx],all_labels[idx]
                     bbox = bboxOf(name)
                     if bbox is False:
                         continue
                     names.append(os.path.join(dirname,name))
                     labels.append(label)
                     bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
             else:
                 while len(names) < N:
                     idx = random.choice(range(len(all_names)))
                     name = all_names[idx]
                     bbox = bboxOf(name)
                     if bbox is False:
                         continue
                     label = all_labels[idx]
                     names.append(os.path.join(dirname,name))
                     labels.append(label)
                     bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
             names,labels = np.array(names),np.array(labels)
             imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names])
             if full_img:
                 full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names])
                 assert(np.max(imgs) <= 1 and np.min(imgs) >= 0)
                 assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0)
                 return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names)
             else:
                 return(imgs,labels,names)
Beispiel #4
0
def sample_img(N,splitid,variety="train_normal",imgname=None,include_saliency=False,val_candidates=None,full_img=True,dataset='COCO'):
    '''
    Using nested functions here to avoid repeatedly doing all the work to get names of possible things to sample.
    The variable 'static' is bound by the outer function call, emulating C's idea of static variables.
    '''
    all_names,all_labels = imgname_cache(splitid,variety,dataset)
    def shapeOf(bbox):
        return readsql("SELECT height,width FROM imgsize WHERE imgname = '{}'".format(os.path.splitext(os.path.split(bbox['imgname'].ix[0])[1])[0]))
    def boundcond(num):
        return (0 <= num <= 224)
    def adjust(bbox):
        shape = shapeOf(bbox)
        if len(shape) == 0:
            return False
        bbox['miny'],bbox['maxy'] = bbox['miny'].ix[0] * (224 / shape['height'].ix[0]),bbox['maxy'].ix[0] * (224 / shape['height'].ix[0])
        bbox['minx'],bbox['maxx'] = bbox['minx'].ix[0] * (224 / shape['width'].ix[0]),bbox['maxx'].ix[0] * (224 / shape['width'].ix[0])
        if (bbox['maxy'].ix[0] > 224) and np.allclose(bbox['maxy'].ix[0],224,rtol=1e-2):
            bbox['maxy'].ix[0] = 224
        if (bbox['maxx'].ix[0] > 224) and np.allclose(bbox['maxx'].ix[0],224,rtol=1e-2):
            bbox['maxx'].ix[0] = 224
        try:
            assert(boundcond(bbox['miny'].ix[0]))
            assert(boundcond(bbox['maxy'].ix[0]))
            assert(boundcond(bbox['minx'].ix[0]))
            assert(boundcond(bbox['maxx'].ix[0]))
        except:
            print("assert fail")
            input()
        return bbox
    # I think I need to add 
    if variety in ["train_normal","unseen_normal","testperfect_normal"]:
        if dataset == 'COCO':
            dirname = params.root('train_patches')
        elif dataset == 'pascal':
            dirname = params.root('val_candidateimgs')
        def bboxOf(name,adjusted=True):
            if dataset == 'COCO':
                bb = readsql("SELECT miny,maxy,minx,maxx,imgname FROM perfect_bbox WHERE patchname = '{}' AND isexpanded = 0".format(os.path.join(dirname,name)))
            else:
                bb = readsql("SELECT min AS miny,maxy,minx,maxx,imgname from candidate_bbox WHERE dataset = 'pascal' AND imgname = '{}' AND canid = {}".format(name[0],name[1]))
            if adjusted:
                bb = adjust(bb)
            return bb
    elif variety == "test":
        def bboxOf(name,adjusted=True):
            canid = os.path.splitext(name.split("_")[5])[0]
            name = name.split("_")[0:3] + ".jpg"
            bb = readsql("SELECT miny,maxy,minx,maxx FROM candidate_bbox WHERE imgname = '{}' AND canid = {}".format(name,canid))
            if adjusted:
                bb = adjust(bb)
            return bb
        if dataset == 'COCO':
            dirname = params.root("val_candidateimgs")
        elif dataset == 'pascal':
            dirname = params.root('val_candidateimgs')
    if variety in ["train_normal","unseen_normal","train_ctx","unseen_ctx","train_xlctx","unseen_xlctx"]:
        full_flag,imgid_fmt = True,True
    elif variety in ["testperfect_seen","testperfect_unseen"]:
        full_flag,imgid_fmt = False,True
    elif variety == "test":
        imgid_fmt = False,False
    else:
        print("Unknown variety {}".format(variety))
        sys.exit(1)
    def inner(tomatch=None,specified=None):
        '''
        Train happens with train_ctxpatches.
        Discovery happens with candidates. include_saliency can happen with candidates.
        tomatch is a list of categories.
        '''
        if dataset == 'pascal':
            assert(not include_saliency)
            names,canids,labels,bboxs = [],[],[],[]
            if not tomatch is None:
                while len(names) < N:
                    i = len(names)
                    if specified is None:
                        if random.random() < constants.negprop:
                            idx = random.choice(range(len(all_names[0])))
                        else:
                            idx = random.choice(np.where(all_labels == tomatch[i])[0])
                    else:
                        idx = np.argmax(all_names == specified[i])
                    name,canid,label = all_names[0][idx],all_names[1][idx],all_labels[idx]
                    bbox = bboxOf((name,canid))
                    if bbox is False:
                        continue
                    names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg"))
                    labels.append(label)
                    canids.append(canid)
                    bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
            else:
                while len(names) < N:
                    idx = random.choice(range(len(all_names[0])))
                    name,canid = all_names[0][idx],all_names[1][idx]
                    bbox = bboxOf((name,canid))
                    if bbox is False:
                        continue
                    label = all_labels[idx]
                    canids.append(canid)
                    names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg"))
                    labels.append(label)
                    bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
            names,labels = np.array(names),np.array(labels)
            imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names])
            if full_img:
                full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names])
                assert(np.max(imgs) <= 1 and np.min(imgs) >= 0)
                assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0)
                return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names)
            else:
                return(imgs,labels,names)
        if dataset == 'COCO':
            if include_saliency:
                sal_imgs,imgs,labels = [],[],[]
                conn = sqlite3.connect(params.read_db,timeout=300)
                while (len(imgs) < N) and (len(sal_imgs) < N):
                    candidates = val_candidates.sample(N)
                    try: # some of these may fail.
                        for rowid,row in candidates.iterrows():
                            imgs.append(imread_wrap(os.path.join(params.root('val_images'),row['imgname'] + ".jpg")))
                            imgcans = val_candidates[val_candidates['imgname'] == row['imgname']]
                            canid = np.argmax(np.all((row == imgcans).values,axis=1))
                            cat = pd.read_sql("SELECT classname FROM ground_truth WHERE imgname = '{}' AND canid = {}".format(row['imgname'],canid),conn)['classname'].values[0]
                            labels.append(cat)
                            sal_imgs.append(imresize(imread(params.root("val_saliency/") + row['imgname'] + ".jpg")[row['miny']:row['maxy'],row['minx']:row['maxx']],(224,224)))
                    except:
                        imgs,labels,sal_imgs = [], [], []
                        continue
                    conn.close()
                return(np.array(imgs),np.array(labels),np.array(sal_imgs))
            else:
                names,labels,bboxs = [],[],[]
                if not tomatch is None:
                    while len(names) < N:
                        i = len(names)
                        if specified is None:
                            if random.random() < constants.negprop:
                                idx = random.choice(range(len(all_names)))
                            else:
                                idx = random.choice(np.where(all_labels == tomatch[i])[0])
                        else:
                            idx = np.argmax(all_names == specified[i])
                        name,label = all_names[idx],all_labels[idx]
                        bbox = bboxOf(name)
                        if bbox is False:
                            continue
                        names.append(os.path.join(dirname,name))
                        labels.append(label)
                        bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
                else:
                    while len(names) < N:
                        idx = random.choice(range(len(all_names)))
                        name = all_names[idx]
                        bbox = bboxOf(name)
                        if bbox is False:
                            continue
                        label = all_labels[idx]
                        names.append(os.path.join(dirname,name))
                        labels.append(label)
                        bboxs.append(bbox[['miny','maxy','minx','maxx']].values)
                names,labels = np.array(names),np.array(labels)
                imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names])
                if full_img:
                    full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names])
                    assert(np.max(imgs) <= 1 and np.min(imgs) >= 0)
                    assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0)
                    return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names)
                else:
                    return(imgs,labels,names)
    return inner
Beispiel #5
0
    def ctxpath_decode(dname,inclusion="split",dataset='COCO'):
        '''
        Inclusion is an argument whose value inclusion="split" means keep only the known classes and value inclusion="notsplit" means keep the unknown classes, and "all"
        means keep all classes.

        dname is the directory name containing the imgnames to be found, depending on whether using expanded bounding boxes.
        '''
        if dataset == 'COCO':
            idir = "train" if dname.split("_")[0] == "train" else "val"
            isexpanded = dname.split("_")[1] in ["ctxpatches","xlctxpatches"]
            isxl = dname.split("_")[1] == "xlctxpatches"
            #bboxs = readsql("SELECT * FROM perfect_bbox INNER JOIN imgsize ON perfect_bbox.imgname = '{4}' || imgsize.imgname || '.jpg' WHERE perfect_bbox.patchname LIKE '{0}%' AND ((maxy - miny) / height) > {1} AND ((maxx - minx) / width) > {1} AND isxl = {2} AND isexpanded = {3}".format(params.root(dname),1/3,int(isxl),int(isexpanded),params.root(idir + "_images/")))
            bboxs = readsql("SELECT * FROM perfect_bbox INNER JOIN imgsize ON perfect_bbox.imgname = '{4}' || imgsize.imgname || '.jpg' WHERE perfect_bbox.patchname LIKE '{0}%' AND ((maxy - miny) / height::float) > {1} AND ((maxx - minx) / width::float) > {1} AND isxl = {2} AND isexpanded = {3}".format(params.root(dname),1/3,int(isxl),int(isexpanded),params.root(idir + "_images/")))
            names = bboxs['patchname'].values
            labels = np.array(["_".join(os.path.split(name)[1].split("_")[1:-1]) for name in names])
            if inclusion == "split":
                which = [(label in params.possible_splits[splitid]['known']) for label in labels]
            elif inclusion == "notsplit":
                which = [(label not in params.possible_splits[splitid]['known']) for label in labels]
            elif inclusion == "all":
                which = [True for label in labels]
            which = np.array(which)
            # further filter by minimum allowed size and whether bbox exists.
            return(names[which],labels[which])
        elif dataset == 'pascal':
            cans = readsql("SELECT * FROM candidate_bbox NATURAL JOIN ground_truth WHERE dataset = 'pascal'") #not yet tested but seems okay.
            knowns = readsql("select * from splitcats where dataset = 'COCO' AND splitid = {} AND seen = 1".format(splitid))
            for equiv in datasets.equivalences['coco,pascal']:
                knowns['category'].replace(equiv['coco'],equiv['pascal'],inplace=True)
            mask = cans['classname'].isin(knowns['category'])
            if inclusion == "notsplit":
                mask = np.logical_not(mask)
                print("np.mean(mask)={}".format(np.mean(mask)))
            elif inclusion == "split":
                print("np.mean(mask)={}".format(np.mean(mask)))
            else:
                assert(False)
            mask = np.logical_and(cans['classname'] != 'None',mask)
            cans = cans[mask]
            cans = cans.reset_index(drop=True)
            return (cans['imgname'],cans['canid']),cans['classname']
Beispiel #6
0
def imread_wrap(path,tmp=False):
    '''imread that enforces a (224,224,3) RGB output.
       if tmp is true, don't do any caching.
    '''
    if tmp:
        img = imread(path)
        if len(img.shape) == 2:
            img = gray2rgb(img)
        img = img_as_float(imresize(img,(224,224)))
    d,f = os.path.split(path)
    if d == params.root("val_images"):
        quickpath = params.root("val_squareimgs")
    elif d == params.root("val_patches"):
        quickpath = params.root("val_squarepatches")
    elif d == params.root("train_images"):
        quickpath = params.root("train_squareimgs")
    elif d == params.root("train_patches"):
        quickpath = params.root("train_squarepatches")
    elif d == params.root("val_candidateimgs"):
        quickpath = params.root("val_squarecandidateimgs")
    elif d == params.root("debug"):
        quickpath = params.root("train_quickdebug")
    else:
        quickpath = d + '-cache'
    qname = os.path.join(quickpath,f)
    if not os.path.exists(quickpath):
        subprocess.call(["mkdir",quickpath])
    if not os.path.exists(qname):
        img = imread(path)
        if len(img.shape) == 2:
            img = gray2rgb(img)
        img = img_as_float(imresize(img,(224,224)))
        imsave(qname,img)
        return img
    else:
        return img_as_float(imread(qname))