def fullOf(x,istrain,imgid_fmt,dataset): if dataset == 'COCO': s = "train" if istrain else "val" d = params.root("{}_images".format(s)) if imgid_fmt: imgnum = x.split("_")[0] digits = len(str(imgnum)) return os.path.join(d,cocofmt(istrain,imgnum)) else: fname = s.split("_")[0:2] + ".jpg" return os.path.join(d,fname) elif dataset == 'pascal': return os.path.join(params.root("val_images/"+ "_".join(x.split("_")[0:2]) + ".jpg"))
def affinity_outfmt(x,splitid,nickname,num_candidates,even,perfect): return params.root("kernels/{}_{}_{}_{}_{}_{}.pkl".format(x,splitid,nickname,num_candidates,even,perfect))
def inner(tomatch=None,specified=None): ''' Train happens with train_ctxpatches. Discovery happens with candidates. include_saliency can happen with candidates. tomatch is a list of categories. ''' if dataset == 'pascal': assert(not include_saliency) names,canids,labels,bboxs = [],[],[],[] if not tomatch is None: while len(names) < N: i = len(names) if specified is None: if random.random() < constants.negprop: idx = random.choice(range(len(all_names[0]))) else: idx = random.choice(np.where(all_labels == tomatch[i])[0]) else: idx = np.argmax(all_names == specified[i]) name,canid,label = all_names[0][idx],all_names[1][idx],all_labels[idx] bbox = bboxOf((name,canid)) if bbox is False: continue names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg")) labels.append(label) canids.append(canid) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) else: while len(names) < N: idx = random.choice(range(len(all_names[0]))) name,canid = all_names[0][idx],all_names[1][idx] bbox = bboxOf((name,canid)) if bbox is False: continue label = all_labels[idx] canids.append(canid) names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg")) labels.append(label) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) names,labels = np.array(names),np.array(labels) imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names]) if full_img: full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names]) assert(np.max(imgs) <= 1 and np.min(imgs) >= 0) assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0) return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names) else: return(imgs,labels,names) if dataset == 'COCO': if include_saliency: sal_imgs,imgs,labels = [],[],[] conn = sqlite3.connect(params.read_db,timeout=300) while (len(imgs) < N) and (len(sal_imgs) < N): candidates = val_candidates.sample(N) try: # some of these may fail. for rowid,row in candidates.iterrows(): imgs.append(imread_wrap(os.path.join(params.root('val_images'),row['imgname'] + ".jpg"))) imgcans = val_candidates[val_candidates['imgname'] == row['imgname']] canid = np.argmax(np.all((row == imgcans).values,axis=1)) cat = pd.read_sql("SELECT classname FROM ground_truth WHERE imgname = '{}' AND canid = {}".format(row['imgname'],canid),conn)['classname'].values[0] labels.append(cat) sal_imgs.append(imresize(imread(params.root("val_saliency/") + row['imgname'] + ".jpg")[row['miny']:row['maxy'],row['minx']:row['maxx']],(224,224))) except: imgs,labels,sal_imgs = [], [], [] continue conn.close() return(np.array(imgs),np.array(labels),np.array(sal_imgs)) else: names,labels,bboxs = [],[],[] if not tomatch is None: while len(names) < N: i = len(names) if specified is None: if random.random() < constants.negprop: idx = random.choice(range(len(all_names))) else: idx = random.choice(np.where(all_labels == tomatch[i])[0]) else: idx = np.argmax(all_names == specified[i]) name,label = all_names[idx],all_labels[idx] bbox = bboxOf(name) if bbox is False: continue names.append(os.path.join(dirname,name)) labels.append(label) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) else: while len(names) < N: idx = random.choice(range(len(all_names))) name = all_names[idx] bbox = bboxOf(name) if bbox is False: continue label = all_labels[idx] names.append(os.path.join(dirname,name)) labels.append(label) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) names,labels = np.array(names),np.array(labels) imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names]) if full_img: full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names]) assert(np.max(imgs) <= 1 and np.min(imgs) >= 0) assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0) return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names) else: return(imgs,labels,names)
def sample_img(N,splitid,variety="train_normal",imgname=None,include_saliency=False,val_candidates=None,full_img=True,dataset='COCO'): ''' Using nested functions here to avoid repeatedly doing all the work to get names of possible things to sample. The variable 'static' is bound by the outer function call, emulating C's idea of static variables. ''' all_names,all_labels = imgname_cache(splitid,variety,dataset) def shapeOf(bbox): return readsql("SELECT height,width FROM imgsize WHERE imgname = '{}'".format(os.path.splitext(os.path.split(bbox['imgname'].ix[0])[1])[0])) def boundcond(num): return (0 <= num <= 224) def adjust(bbox): shape = shapeOf(bbox) if len(shape) == 0: return False bbox['miny'],bbox['maxy'] = bbox['miny'].ix[0] * (224 / shape['height'].ix[0]),bbox['maxy'].ix[0] * (224 / shape['height'].ix[0]) bbox['minx'],bbox['maxx'] = bbox['minx'].ix[0] * (224 / shape['width'].ix[0]),bbox['maxx'].ix[0] * (224 / shape['width'].ix[0]) if (bbox['maxy'].ix[0] > 224) and np.allclose(bbox['maxy'].ix[0],224,rtol=1e-2): bbox['maxy'].ix[0] = 224 if (bbox['maxx'].ix[0] > 224) and np.allclose(bbox['maxx'].ix[0],224,rtol=1e-2): bbox['maxx'].ix[0] = 224 try: assert(boundcond(bbox['miny'].ix[0])) assert(boundcond(bbox['maxy'].ix[0])) assert(boundcond(bbox['minx'].ix[0])) assert(boundcond(bbox['maxx'].ix[0])) except: print("assert fail") input() return bbox # I think I need to add if variety in ["train_normal","unseen_normal","testperfect_normal"]: if dataset == 'COCO': dirname = params.root('train_patches') elif dataset == 'pascal': dirname = params.root('val_candidateimgs') def bboxOf(name,adjusted=True): if dataset == 'COCO': bb = readsql("SELECT miny,maxy,minx,maxx,imgname FROM perfect_bbox WHERE patchname = '{}' AND isexpanded = 0".format(os.path.join(dirname,name))) else: bb = readsql("SELECT min AS miny,maxy,minx,maxx,imgname from candidate_bbox WHERE dataset = 'pascal' AND imgname = '{}' AND canid = {}".format(name[0],name[1])) if adjusted: bb = adjust(bb) return bb elif variety == "test": def bboxOf(name,adjusted=True): canid = os.path.splitext(name.split("_")[5])[0] name = name.split("_")[0:3] + ".jpg" bb = readsql("SELECT miny,maxy,minx,maxx FROM candidate_bbox WHERE imgname = '{}' AND canid = {}".format(name,canid)) if adjusted: bb = adjust(bb) return bb if dataset == 'COCO': dirname = params.root("val_candidateimgs") elif dataset == 'pascal': dirname = params.root('val_candidateimgs') if variety in ["train_normal","unseen_normal","train_ctx","unseen_ctx","train_xlctx","unseen_xlctx"]: full_flag,imgid_fmt = True,True elif variety in ["testperfect_seen","testperfect_unseen"]: full_flag,imgid_fmt = False,True elif variety == "test": imgid_fmt = False,False else: print("Unknown variety {}".format(variety)) sys.exit(1) def inner(tomatch=None,specified=None): ''' Train happens with train_ctxpatches. Discovery happens with candidates. include_saliency can happen with candidates. tomatch is a list of categories. ''' if dataset == 'pascal': assert(not include_saliency) names,canids,labels,bboxs = [],[],[],[] if not tomatch is None: while len(names) < N: i = len(names) if specified is None: if random.random() < constants.negprop: idx = random.choice(range(len(all_names[0]))) else: idx = random.choice(np.where(all_labels == tomatch[i])[0]) else: idx = np.argmax(all_names == specified[i]) name,canid,label = all_names[0][idx],all_names[1][idx],all_labels[idx] bbox = bboxOf((name,canid)) if bbox is False: continue names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg")) labels.append(label) canids.append(canid) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) else: while len(names) < N: idx = random.choice(range(len(all_names[0]))) name,canid = all_names[0][idx],all_names[1][idx] bbox = bboxOf((name,canid)) if bbox is False: continue label = all_labels[idx] canids.append(canid) names.append(os.path.join(dirname,name + "_objectness_" + str(canid) + ".jpg")) labels.append(label) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) names,labels = np.array(names),np.array(labels) imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names]) if full_img: full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names]) assert(np.max(imgs) <= 1 and np.min(imgs) >= 0) assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0) return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names) else: return(imgs,labels,names) if dataset == 'COCO': if include_saliency: sal_imgs,imgs,labels = [],[],[] conn = sqlite3.connect(params.read_db,timeout=300) while (len(imgs) < N) and (len(sal_imgs) < N): candidates = val_candidates.sample(N) try: # some of these may fail. for rowid,row in candidates.iterrows(): imgs.append(imread_wrap(os.path.join(params.root('val_images'),row['imgname'] + ".jpg"))) imgcans = val_candidates[val_candidates['imgname'] == row['imgname']] canid = np.argmax(np.all((row == imgcans).values,axis=1)) cat = pd.read_sql("SELECT classname FROM ground_truth WHERE imgname = '{}' AND canid = {}".format(row['imgname'],canid),conn)['classname'].values[0] labels.append(cat) sal_imgs.append(imresize(imread(params.root("val_saliency/") + row['imgname'] + ".jpg")[row['miny']:row['maxy'],row['minx']:row['maxx']],(224,224))) except: imgs,labels,sal_imgs = [], [], [] continue conn.close() return(np.array(imgs),np.array(labels),np.array(sal_imgs)) else: names,labels,bboxs = [],[],[] if not tomatch is None: while len(names) < N: i = len(names) if specified is None: if random.random() < constants.negprop: idx = random.choice(range(len(all_names))) else: idx = random.choice(np.where(all_labels == tomatch[i])[0]) else: idx = np.argmax(all_names == specified[i]) name,label = all_names[idx],all_labels[idx] bbox = bboxOf(name) if bbox is False: continue names.append(os.path.join(dirname,name)) labels.append(label) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) else: while len(names) < N: idx = random.choice(range(len(all_names))) name = all_names[idx] bbox = bboxOf(name) if bbox is False: continue label = all_labels[idx] names.append(os.path.join(dirname,name)) labels.append(label) bboxs.append(bbox[['miny','maxy','minx','maxx']].values) names,labels = np.array(names),np.array(labels) imgs = np.array([imread_wrap(os.path.join(dirname,x)) for x in names]) if full_img: full_imgs = np.array([imread_wrap(fullOf(os.path.split(x)[1],full_flag,imgid_fmt,dataset)) for x in names]) assert(np.max(imgs) <= 1 and np.min(imgs) >= 0) assert(np.max(full_imgs) <= 1 and np.min(full_imgs) >= 0) return(imgs,full_imgs,np.array(bboxs).squeeze(),labels,names) else: return(imgs,labels,names) return inner
def ctxpath_decode(dname,inclusion="split",dataset='COCO'): ''' Inclusion is an argument whose value inclusion="split" means keep only the known classes and value inclusion="notsplit" means keep the unknown classes, and "all" means keep all classes. dname is the directory name containing the imgnames to be found, depending on whether using expanded bounding boxes. ''' if dataset == 'COCO': idir = "train" if dname.split("_")[0] == "train" else "val" isexpanded = dname.split("_")[1] in ["ctxpatches","xlctxpatches"] isxl = dname.split("_")[1] == "xlctxpatches" #bboxs = readsql("SELECT * FROM perfect_bbox INNER JOIN imgsize ON perfect_bbox.imgname = '{4}' || imgsize.imgname || '.jpg' WHERE perfect_bbox.patchname LIKE '{0}%' AND ((maxy - miny) / height) > {1} AND ((maxx - minx) / width) > {1} AND isxl = {2} AND isexpanded = {3}".format(params.root(dname),1/3,int(isxl),int(isexpanded),params.root(idir + "_images/"))) bboxs = readsql("SELECT * FROM perfect_bbox INNER JOIN imgsize ON perfect_bbox.imgname = '{4}' || imgsize.imgname || '.jpg' WHERE perfect_bbox.patchname LIKE '{0}%' AND ((maxy - miny) / height::float) > {1} AND ((maxx - minx) / width::float) > {1} AND isxl = {2} AND isexpanded = {3}".format(params.root(dname),1/3,int(isxl),int(isexpanded),params.root(idir + "_images/"))) names = bboxs['patchname'].values labels = np.array(["_".join(os.path.split(name)[1].split("_")[1:-1]) for name in names]) if inclusion == "split": which = [(label in params.possible_splits[splitid]['known']) for label in labels] elif inclusion == "notsplit": which = [(label not in params.possible_splits[splitid]['known']) for label in labels] elif inclusion == "all": which = [True for label in labels] which = np.array(which) # further filter by minimum allowed size and whether bbox exists. return(names[which],labels[which]) elif dataset == 'pascal': cans = readsql("SELECT * FROM candidate_bbox NATURAL JOIN ground_truth WHERE dataset = 'pascal'") #not yet tested but seems okay. knowns = readsql("select * from splitcats where dataset = 'COCO' AND splitid = {} AND seen = 1".format(splitid)) for equiv in datasets.equivalences['coco,pascal']: knowns['category'].replace(equiv['coco'],equiv['pascal'],inplace=True) mask = cans['classname'].isin(knowns['category']) if inclusion == "notsplit": mask = np.logical_not(mask) print("np.mean(mask)={}".format(np.mean(mask))) elif inclusion == "split": print("np.mean(mask)={}".format(np.mean(mask))) else: assert(False) mask = np.logical_and(cans['classname'] != 'None',mask) cans = cans[mask] cans = cans.reset_index(drop=True) return (cans['imgname'],cans['canid']),cans['classname']
def imread_wrap(path,tmp=False): '''imread that enforces a (224,224,3) RGB output. if tmp is true, don't do any caching. ''' if tmp: img = imread(path) if len(img.shape) == 2: img = gray2rgb(img) img = img_as_float(imresize(img,(224,224))) d,f = os.path.split(path) if d == params.root("val_images"): quickpath = params.root("val_squareimgs") elif d == params.root("val_patches"): quickpath = params.root("val_squarepatches") elif d == params.root("train_images"): quickpath = params.root("train_squareimgs") elif d == params.root("train_patches"): quickpath = params.root("train_squarepatches") elif d == params.root("val_candidateimgs"): quickpath = params.root("val_squarecandidateimgs") elif d == params.root("debug"): quickpath = params.root("train_quickdebug") else: quickpath = d + '-cache' qname = os.path.join(quickpath,f) if not os.path.exists(quickpath): subprocess.call(["mkdir",quickpath]) if not os.path.exists(qname): img = imread(path) if len(img.shape) == 2: img = gray2rgb(img) img = img_as_float(imresize(img,(224,224))) imsave(qname,img) return img else: return img_as_float(imread(qname))