def getScoreForHashVal((path_to_db, hash_table, hash_val, class_idx, video_idx, class_idx_gt)): mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id) criterion = (TubeHash.hash_table == hash_table, TubeHash.hash_val == hash_val) vals = mani.selectMix(toSelect=toSelect, criterion=criterion) vals = np.array(vals) if not hasattr(class_idx, '__iter__'): class_idx = [class_idx] total_count_total = vals.shape[0] video_count_total = sum( np.logical_and(vals[:, 0] == class_idx_gt, vals[:, 1] == video_idx)) scores = [] for class_idx_curr in class_idx: class_count = sum(vals[:, 0] == class_idx_curr) if class_idx_curr == class_idx_gt: video_count = video_count_total else: video_count = 0 class_count = class_count - video_count total_count = total_count_total - video_count_total score = class_count / float(total_count) scores.append(score) mani.closeSession() return scores
def script_verifyRecordedScoreMatchesDBScore(params): path_to_db = params.path_to_db path_to_hash = params.path_to_hash total_class_counts = params.total_class_counts img_path = params.img_path class_label = params.class_label video_id = params.video_id shot_id = params.shot_id class_idx = params.class_idx score_file = params.score_file mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.idx, ) criterion = (Tube.class_idx_pascal == class_idx, Tube.video_id == video_id, Tube.shot_id == shot_id) total_shot_patches = mani.count(toSelect, criterion) #get patch id hash_info_patch = getHashInfoForImg(path_to_db, img_path) # patch_id=mani.select((Tube.idx,),(Tube.img_path==img_path,)); # assert len(patch_id)==1; # patch_id=patch_id[0][0]; # #get hash vals # mani_hash=TubeHash_Manipulator(path_to_db); # mani_hash.openSession(); # toSelect=(TubeHash.hash_table,TubeHash.hash_val) # criterion=(TubeHash.idx==patch_id,); # hash_info_patch=mani_hash.select(toSelect,criterion); #get hash_info of all patches in shot criterion = (Tube.class_idx_pascal == class_idx, Tube.video_id == video_id, Tube.shot_id == shot_id) hash_info_all = mani_hash.selectMix(toSelect, criterion) mani_hash.closeSession() mani.closeSession() hash_info_all = list(hash_info_all) hash_scores_patch = [] for idx_hash_info, hash_info_curr in enumerate(hash_info_patch): hash_file_curr = str(hash_info_curr[0]) + '_' + str( hash_info_curr[1]) + '_counts.p' hash_file_curr = os.path.join(path_to_hash, hash_file_curr) hash_bin_class_counts = pickle.load(open(hash_file_curr, 'rb')) hash_bin_class_count = hash_bin_class_counts[class_idx] numo = hash_bin_class_count - hash_info_all.count(hash_info_curr) deno = total_class_counts[class_idx] - total_shot_patches hash_scores_patch.append(numo / float(deno)) score_db = np.mean(hash_scores_patch) print len(hash_scores_patch), score_db, score_file assert np.isclose(score_db, score_file)
def script_saveHashAnalysisImages(params): path_to_db = params.path_to_db class_labels_map = params.class_labels_map percents = params.percents out_file_class_pre = params.out_file_class_pre out_file_hash_simple = params.out_file_hash_simple out_file_hash_byClass = params.out_file_hash_byClass hashtable = params.hashtable inc = params.inc dtype = params.dtype # in_file = params.in_file; if not os.path.exists(out_file_class_pre + '.npz'): mani = Tube_Manipulator(path_to_db) mani.openSession() ids = mani.selectMix((Tube.class_idx_pascal, TubeHash.hash_val), (TubeHash.hash_table == hashtable, )) mani.closeSession() ids = np.array(ids, dtype=dtype) np.savez(out_file_class_pre, ids) ids = np.load(out_file_class_pre + '.npz')['arr_0'] # ids=np.load(in_file)['arr_0']; counts_all, class_ids_breakdown = getClassIdsCount(ids[:, 0], ids[:, 1]) ranks = getDiscriminativeScore(counts_all) sort_idx = np.argsort(ranks) counts_all = [counts_all[idx] for idx in sort_idx] class_ids_breakdown = [class_ids_breakdown[idx] for idx in sort_idx] im_simple = getHashAnalysisIm(counts_all, class_ids_breakdown, inc=inc, colorByClass=False) im_byClass = getHashAnalysisIm(counts_all, class_ids_breakdown, inc=inc, colorByClass=True) visualize.saveMatAsImage(im_simple, out_file_hash_simple) visualize.saveMatAsImage(im_byClass, out_file_hash_byClass) counts_all_ravel = np.array([c for counts in counts_all for c in counts]) class_ids_breakdown_ravel = np.array( [c for class_ids in class_ids_breakdown for c in class_ids]) class_id_pascal, class_idx_pascal = zip(*class_labels_map) for class_id_idx, class_id in enumerate(class_idx_pascal): frequency = counts_all_ravel[class_ids_breakdown_ravel == class_id] out_file = out_file_class_pre + '_' + class_id_pascal[ class_id_idx] + '.png' title = class_id_pascal[class_id_idx] + ' ' + str(class_id) cum_freq, idx_perc = getCumulativeInfo(frequency, percents) savePerClassCumulativeGraph(cum_freq / float(cum_freq[-1]), idx_perc, percents, out_file, title)
def getScoreForIdx(table_idx, path_to_db, class_idx_pascal=None, npz_path=None, n_jobs=1, total_counts=None): mani = Tube_Manipulator(path_to_db) mani.openSession() mani_hash = TubeHash_Manipulator(path_to_db) mani_hash.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id, Tube.img_path) criterion = (Tube.idx == table_idx, ) [(class_idx_gt, video_idx, frame_path)] = mani.select(toSelect, criterion) if class_idx_pascal is not None: class_idx = class_idx_pascal else: class_idx = class_idx_gt toSelect = (TubeHash.hash_table, TubeHash.hash_val) criterion = (TubeHash.idx == table_idx, ) hash_table_info = mani_hash.select(toSelect, criterion) print len(hash_table_info) mani_hash.closeSession() mani.closeSession() args = [] for hash_table_no in range(len(hash_table_info)): hash_table = hash_table_info[hash_table_no][0] hash_val = hash_table_info[hash_table_no][1] if npz_path is not None: args.append((npz_path, hash_table, hash_val, class_idx, video_idx, class_idx_gt, total_counts)) else: args.append((path_to_db, hash_table, hash_val, class_idx, video_idx, class_idx_gt)) if n_jobs > 1: p = multiprocessing.Pool(min(multiprocessing.cpu_count(), n_jobs)) if npz_path is not None: scores = p.map(getScoreForHashValFromNpz, args) else: scores = p.map(getScoreForHashVal, args) else: scores = [] for arg in args: if npz_path is not None: scores.append(getScoreForHashValFromNpz(arg)) else: scores.append(getScoreForHashVal(arg)) return scores, class_idx_gt, frame_path
def getScoreForHashVal((path_to_db,hash_table,hash_val,class_idx,video_idx,class_idx_gt)): mani=Tube_Manipulator(path_to_db); mani.openSession(); toSelect=(Tube.class_idx_pascal,Tube.video_id) criterion=(TubeHash.hash_table==hash_table,TubeHash.hash_val==hash_val); vals=mani.selectMix(toSelect=toSelect,criterion=criterion); vals=np.array(vals); if not hasattr(class_idx,'__iter__'): class_idx=[class_idx]; total_count_total=vals.shape[0]; video_count_total=sum(np.logical_and(vals[:,0]==class_idx_gt,vals[:,1]==video_idx)); scores=[]; for class_idx_curr in class_idx: class_count=sum(vals[:,0]==class_idx_curr); if class_idx_curr==class_idx_gt: video_count=video_count_total; else: video_count=0; class_count=class_count-video_count; total_count=total_count_total-video_count_total; score=class_count/float(total_count); scores.append(score); mani.closeSession(); return scores;
def getInfoForFeatureExtractionForVideo(path_to_db,video_info,numberOfFrames): info_for_extraction=[]; mani=Tube_Manipulator(path_to_db); mani.openSession(); for pascal_id in video_info: video_ids=video_info[pascal_id]; for video_id in video_ids: info=mani.select((Tube.img_path,Tube.class_id_pascal,Tube.deep_features_path,Tube.deep_features_idx),(Tube.video_id==video_id,Tube.class_id_pascal==pascal_id),distinct=True,limit=numberOfFrames); info_for_extraction=info_for_extraction+info; mani.closeSession(); return info_for_extraction
def getShotFrameCount(path_to_db, class_idx, video_id, shot_id): if type(path_to_db) == str: mani = Tube_Manipulator(path_to_db) mani.openSession() else: mani = path_to_db toSelect = (Tube.idx, ) criterion = (Tube.class_idx_pascal == class_idx, Tube.video_id == video_id, Tube.shot_id == shot_id) frame_count = mani.count(toSelect, criterion) if type(path_to_db) == str: mani.closeSession() return frame_count
def getNVideosByPascalIds(path_to_db,pascal_ids,numberofVideos): dict_out={}; mani=Tube_Manipulator(path_to_db); mani.openSession(); for pascal_id in pascal_ids: total_ids=mani.select((Tube.video_id,),(Tube.class_id_pascal==pascal_id,),distinct=True,limit=numberofVideos); total_ids=[total_id[0] for total_id in total_ids]; # random.shuffle(total_ids); # selected_ids=total_ids[:numberofVideos]; dict_out[pascal_id]=total_ids; mani.closeSession(); return dict_out
def main(): return path_to_db = 'sqlite://///disk2/novemberExperiments/experiments_youtube/patches_nn_hash.db' mani = Tube_Manipulator(path_to_db) mani_hash = TubeHash_Manipulator(path_to_db) mani.openSession() deep_features_path_all = mani.select((Tube.deep_features_path, ), distinct=True) deep_features_path_all = [x[0] for x in deep_features_path_all] print len(deep_features_path_all) # print deep_features_path_all[:10] mani_hash.openSession() for idx_deep_features_path, deep_features_path in enumerate( deep_features_path_all[11:]): t = time.time() hash_file = deep_features_path[:-4] + '_hash.npy' print hash_file idx_info = mani.select( (Tube.idx, Tube.deep_features_idx), (Tube.deep_features_path == deep_features_path, )) # idx_all,deep_features_idx_all=zip(*idx_info); hash_vals = np.load(hash_file) # print len(idx_all),hash_vals.shape for idx_foreign, row in idx_info: # hash_vals_curr=hash_vals[row]; for hash_table, hash_val in enumerate(hash_vals[row]): # pass; # print type(idx_foreign),type(hash_table),type(int(hash_val)) mani_hash.insert(idx=idx_foreign, hash_table=hash_table, hash_val=int(hash_val), commit=False) if idx_deep_features_path % 10 == 0: mani_hash.session.commit() # print time.time()-t; mani_hash.closeSession() mani.closeSession()
def saveTotalClassBreakdowns(path_to_db, out_file): mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id, Tube.shot_id, Tube.tube_id) vals = mani.select(toSelect, distinct=True) mani.closeSession() vals = np.array(vals) class_idx_db = vals[:, 0] ids_db = vals[:, 1:] column_names = ['video', 'shot', 'tube'] counts = getClassCountsByIdType(class_idx_db, ids_db, column_names) pickle.dump(counts, open(out_file, 'wb'))
def script_verifyRecordedScoreMatchesDBScore(params): path_to_db = params.path_to_db path_to_hash = params.path_to_hash total_class_counts = params.total_class_counts img_path = params.img_path class_label = params.class_label video_id = params.video_id shot_id = params.shot_id class_idx = params.class_idx score_file = params.score_file mani=Tube_Manipulator(path_to_db); mani.openSession(); toSelect=(Tube.idx,); criterion=(Tube.class_idx_pascal==class_idx,Tube.video_id==video_id,Tube.shot_id==shot_id); total_shot_patches=mani.count(toSelect,criterion); #get patch id hash_info_patch=getHashInfoForImg(path_to_db,img_path); # patch_id=mani.select((Tube.idx,),(Tube.img_path==img_path,)); # assert len(patch_id)==1; # patch_id=patch_id[0][0]; # #get hash vals # mani_hash=TubeHash_Manipulator(path_to_db); # mani_hash.openSession(); # toSelect=(TubeHash.hash_table,TubeHash.hash_val) # criterion=(TubeHash.idx==patch_id,); # hash_info_patch=mani_hash.select(toSelect,criterion); #get hash_info of all patches in shot criterion=(Tube.class_idx_pascal==class_idx,Tube.video_id==video_id,Tube.shot_id==shot_id) hash_info_all=mani_hash.selectMix(toSelect,criterion); mani_hash.closeSession(); mani.closeSession() hash_info_all=list(hash_info_all); hash_scores_patch=[]; for idx_hash_info,hash_info_curr in enumerate(hash_info_patch): hash_file_curr=str(hash_info_curr[0])+'_'+str(hash_info_curr[1])+'_counts.p'; hash_file_curr=os.path.join(path_to_hash,hash_file_curr); hash_bin_class_counts=pickle.load(open(hash_file_curr,'rb')); hash_bin_class_count=hash_bin_class_counts[class_idx]; numo=hash_bin_class_count-hash_info_all.count(hash_info_curr); deno=total_class_counts[class_idx]-total_shot_patches; hash_scores_patch.append(numo/float(deno)); score_db=np.mean(hash_scores_patch); print len(hash_scores_patch),score_db,score_file assert np.isclose(score_db,score_file);
def writeMetaInfoToDb(path_to_db,out_files,idx_global,class_ids_all,path_to_data): mani=Tube_Manipulator(path_to_db); mani.openSession(); for out_file_idx,out_file in enumerate(out_files): if out_file_idx%100==0: print out_file_idx,len(out_files) in_file_text=out_file.replace('.npz','.txt'); patch_files=util.readLinesFromFile(in_file_text); # print out_file,in_file_text,len(patch_files); for idx_img_file,img_file in enumerate(patch_files): img_path=img_file; img_path_split=img_path.split('/'); img_path_split=[segment for segment in img_path_split if segment!='']; mat_name=img_path_split[-3]; class_id_pascal=mat_name[:mat_name.index('_')]; video_id=int(mat_name[mat_name.index('_')+1:mat_name.rindex('_')]); shot_id=int(mat_name[mat_name.rindex('_')+1:]); tube_id=int(img_path_split[-2]); frame_id=img_path_split[-1]; frame_id=int(frame_id[:frame_id.index('.')]); # frame_id+=1 class_idx_pascal=class_ids_all.index(class_id_pascal); deep_features_path=out_file; deep_features_idx=idx_img_file; layer='fc7'; frame_path=getFramePath(path_to_data,class_id_pascal,video_id,shot_id,frame_id+1) assert os.path.exists(frame_path); mani.insert(idx_global, img_path, frame_id, video_id, tube_id, shot_id, frame_path=frame_path, layer=layer, deep_features_path=deep_features_path, deep_features_idx=deep_features_idx, class_id_pascal=class_id_pascal, class_idx_pascal=class_idx_pascal,commit=False); idx_global+=1; mani.session.commit(); mani.closeSession(); return idx_global;
def getShotFrameCount(path_to_db,class_idx,video_id,shot_id): if type(path_to_db)==str: mani=Tube_Manipulator(path_to_db); mani.openSession(); else: mani=path_to_db toSelect=(Tube.idx,); criterion=(Tube.class_idx_pascal==class_idx,Tube.video_id==video_id,Tube.shot_id==shot_id); frame_count=mani.count(toSelect,criterion); if type(path_to_db)==str: mani.closeSession(); return frame_count;
def saveTotalClassBreakdowns(path_to_db,out_file): mani=Tube_Manipulator(path_to_db); mani.openSession(); toSelect=(Tube.class_idx_pascal,Tube.video_id,Tube.shot_id,Tube.tube_id); vals=mani.select(toSelect,distinct=True); mani.closeSession(); vals=np.array(vals); class_idx_db=vals[:,0]; ids_db=vals[:,1:]; column_names=['video','shot','tube']; counts=getClassCountsByIdType(class_idx_db,ids_db,column_names) pickle.dump(counts,open(out_file,'wb'));
def getTubePathsForShot(path_to_db,class_id_pascal,video_id,shot_id,frame_to_choose='middle'): mani=Tube_Manipulator(path_to_db); mani.openSession(); frame_ids=mani.select((Tube.frame_id,),(Tube.class_id_pascal==class_id_pascal,Tube.shot_id==shot_id,Tube.video_id==video_id),distinct=True); frame_ids=[frame_id[0] for frame_id in frame_ids]; frame_ids.sort(); if frame_to_choose=='middle': middle_idx=len(frame_ids)/2; frame_id=frame_ids[middle_idx]; else: frame_id=0; paths=mani.select((Tube.img_path,),(Tube.class_id_pascal==class_id_pascal,Tube.shot_id==shot_id,Tube.video_id==video_id,Tube.frame_id==frame_id),distinct=True); paths=[path[0] for path in paths]; mani.closeSession(); return paths;
def script_saveBigFeatureMats(params): out_file_featureMats_pre = params.out_file_featureMats_pre out_file_meta_pre = params.out_file_meta_pre path_to_db = params.path_to_db out_file_paths = params.out_file_paths num_batches = params.num_batches if not os.path.exists(out_file_paths): mani = Tube_Manipulator(path_to_db) mani.openSession() paths_to_features = mani.select((Tube.deep_features_path, ), distinct=True) paths_to_features = [path_curr[0] for path_curr in paths_to_features] mani.closeSession() random.shuffle(paths_to_features) pickle.dump(paths_to_features, open(out_file_paths, 'wb')) paths_to_features = pickle.load(open(out_file_paths, 'rb')) paths_to_features.sort() batch_size = len(paths_to_features) / num_batches idxRange = util.getIdxRange(len(paths_to_features), batch_size) print len(idxRange), idxRange[-1] # start_idx=0; for start_idx in range(len(idxRange) - 1): out_file_curr = out_file_featureMats_pre + '_' + str( start_idx) + '.npz' out_file_meta_curr = out_file_meta_pre + '_' + str(start_idx) + '.p' print start_idx, idxRange[start_idx], idxRange[ start_idx + 1], out_file_curr, out_file_meta_curr, paths_to_features_curr = paths_to_features[ idxRange[start_idx]:idxRange[start_idx + 1]] t = time.time() train, shape_record = getGiantFeaturesMatGPU(paths_to_features_curr) train = np.array(train) np.savez(out_file_curr, train) pickle.dump([paths_to_features_curr, shape_record], open(out_file_meta_curr, 'wb')) print time.time() - t break
def verifyTotalClassBreakdowns(path_to_db, out_file): counts = pickle.load(open(out_file, 'rb')) mani = Tube_Manipulator(path_to_db) mani.openSession() for class_idx in range(10): print class_idx toSelect = (Tube.video_id, ) criterion = (Tube.class_idx_pascal == class_idx, ) count_video = mani.count(toSelect, criterion, distinct=True) toSelect = (Tube.video_id, Tube.shot_id) count_shot = mani.count(toSelect, criterion, distinct=True) toSelect = (Tube.video_id, Tube.shot_id, Tube.tube_id) count_tube = mani.count(toSelect, criterion, distinct=True) print counts['video'][class_idx], count_video, print counts['shot'][class_idx], count_shot, print counts['tube'][class_idx], count_tube assert counts['video'][class_idx] == count_video assert counts['shot'][class_idx] == count_shot assert counts['tube'][class_idx] == count_tube mani.closeSession()
def getTotalCountsPerClass(path_to_db, class_idx_all): mani = Tube_Manipulator(path_to_db) mani.openSession() total_counts = {} # total=0; for class_idx in class_idx_all: toSelect = (Tube.idx, ) criterion = (Tube.class_idx_pascal == class_idx, ) count_curr = mani.count(toSelect, criterion, distinct=True) total_counts[class_idx] = count_curr # total=total+count_curr; # print class_label,class_idx,count_curr,count_curr/float(6371288),total_counts # print total mani.closeSession() return total_counts
def getHashInfoForImg(path_to_db, img_path): mani = Tube_Manipulator(path_to_db) mani.openSession() #get patch id patch_id = mani.select((Tube.idx, ), (Tube.img_path == img_path, )) assert len(patch_id) == 1 patch_id = patch_id[0][0] mani.closeSession() #get hash vals mani_hash = TubeHash_Manipulator(path_to_db) mani_hash.openSession() toSelect = (TubeHash.hash_table, TubeHash.hash_val) criterion = (TubeHash.idx == patch_id, ) hash_info_patch = mani_hash.select(toSelect, criterion) mani_hash.closeSession() return hash_info_patch
def getScoreForIdx(table_idx,path_to_db,class_idx_pascal=None,npz_path=None,n_jobs=1,total_counts=None): mani=Tube_Manipulator(path_to_db); mani.openSession(); mani_hash=TubeHash_Manipulator(path_to_db); mani_hash.openSession(); toSelect = (Tube.class_idx_pascal,Tube.video_id,Tube.img_path) criterion = (Tube.idx==table_idx,); [(class_idx_gt,video_idx,frame_path)] = mani.select(toSelect,criterion) if class_idx_pascal is not None: class_idx=class_idx_pascal; else: class_idx=class_idx_gt toSelect=(TubeHash.hash_table,TubeHash.hash_val); criterion=(TubeHash.idx==table_idx,) hash_table_info=mani_hash.select(toSelect,criterion); print len(hash_table_info); mani_hash.closeSession(); mani.closeSession(); args=[]; for hash_table_no in range(len(hash_table_info)): hash_table=hash_table_info[hash_table_no][0]; hash_val=hash_table_info[hash_table_no][1]; if npz_path is not None: args.append((npz_path,hash_table,hash_val,class_idx,video_idx,class_idx_gt,total_counts)); else: args.append((path_to_db,hash_table,hash_val,class_idx,video_idx,class_idx_gt)); if n_jobs>1: p = multiprocessing.Pool(min(multiprocessing.cpu_count(),n_jobs)) if npz_path is not None: scores=p.map(getScoreForHashValFromNpz,args) else: scores=p.map(getScoreForHashVal,args) else: scores=[]; for arg in args: if npz_path is not None: scores.append(getScoreForHashValFromNpz(arg)); else: scores.append(getScoreForHashVal(arg)); return scores,class_idx_gt,frame_path
def script_saveHashAnalysisImages(params): path_to_db = params.path_to_db; class_labels_map = params.class_labels_map; percents = params.percents; out_file_class_pre = params.out_file_class_pre; out_file_hash_simple = params.out_file_hash_simple; out_file_hash_byClass = params.out_file_hash_byClass; hashtable = params.hashtable; inc = params.inc; dtype = params.dtype; # in_file = params.in_file; if not os.path.exists(out_file_class_pre+'.npz'): mani=Tube_Manipulator(path_to_db); mani.openSession(); ids=mani.selectMix((Tube.class_idx_pascal,TubeHash.hash_val),(TubeHash.hash_table==hashtable,)); mani.closeSession(); ids=np.array(ids,dtype=dtype); np.savez(out_file_class_pre,ids); ids=np.load(out_file_class_pre+'.npz')['arr_0']; # ids=np.load(in_file)['arr_0']; counts_all,class_ids_breakdown = getClassIdsCount(ids[:,0],ids[:,1]); ranks = getDiscriminativeScore(counts_all); sort_idx=np.argsort(ranks); counts_all=[counts_all[idx] for idx in sort_idx]; class_ids_breakdown=[class_ids_breakdown[idx] for idx in sort_idx]; im_simple = getHashAnalysisIm(counts_all,class_ids_breakdown,inc=inc,colorByClass=False); im_byClass = getHashAnalysisIm(counts_all,class_ids_breakdown,inc=inc,colorByClass=True); visualize.saveMatAsImage(im_simple,out_file_hash_simple) visualize.saveMatAsImage(im_byClass,out_file_hash_byClass) counts_all_ravel=np.array([c for counts in counts_all for c in counts]); class_ids_breakdown_ravel=np.array([c for class_ids in class_ids_breakdown for c in class_ids]); class_id_pascal,class_idx_pascal = zip(*class_labels_map); for class_id_idx,class_id in enumerate(class_idx_pascal): frequency = counts_all_ravel[class_ids_breakdown_ravel==class_id] out_file=out_file_class_pre+'_'+class_id_pascal[class_id_idx]+'.png' title=class_id_pascal[class_id_idx]+' '+str(class_id) cum_freq,idx_perc=getCumulativeInfo(frequency,percents) savePerClassCumulativeGraph(cum_freq/float(cum_freq[-1]),idx_perc,percents,out_file,title)
def verifyTotalClassBreakdowns(path_to_db,out_file): counts=pickle.load(open(out_file,'rb')); mani=Tube_Manipulator(path_to_db); mani.openSession(); for class_idx in range(10): print class_idx toSelect=(Tube.video_id,); criterion=(Tube.class_idx_pascal==class_idx,); count_video=mani.count(toSelect,criterion,distinct=True); toSelect=(Tube.video_id,Tube.shot_id); count_shot=mani.count(toSelect,criterion,distinct=True); toSelect=(Tube.video_id,Tube.shot_id,Tube.tube_id); count_tube=mani.count(toSelect,criterion,distinct=True); print counts['video'][class_idx],count_video, print counts['shot'][class_idx],count_shot, print counts['tube'][class_idx],count_tube assert counts['video'][class_idx]==count_video assert counts['shot'][class_idx]==count_shot assert counts['tube'][class_idx]==count_tube mani.closeSession();
def main(): return path_to_db='sqlite://///disk2/novemberExperiments/experiments_youtube/patches_nn_hash.db'; mani=Tube_Manipulator(path_to_db); mani_hash=TubeHash_Manipulator(path_to_db); mani.openSession(); deep_features_path_all=mani.select((Tube.deep_features_path,),distinct=True); deep_features_path_all=[x[0] for x in deep_features_path_all]; print len(deep_features_path_all); # print deep_features_path_all[:10] mani_hash.openSession(); for idx_deep_features_path,deep_features_path in enumerate(deep_features_path_all[11:]): t=time.time(); hash_file=deep_features_path[:-4]+'_hash.npy'; print hash_file idx_info=mani.select((Tube.idx,Tube.deep_features_idx),(Tube.deep_features_path==deep_features_path,)); # idx_all,deep_features_idx_all=zip(*idx_info); hash_vals=np.load(hash_file); # print len(idx_all),hash_vals.shape for idx_foreign,row in idx_info: # hash_vals_curr=hash_vals[row]; for hash_table,hash_val in enumerate(hash_vals[row]): # pass; # print type(idx_foreign),type(hash_table),type(int(hash_val)) mani_hash.insert(idx=idx_foreign,hash_table=hash_table,hash_val=int(hash_val),commit=False); if idx_deep_features_path%10==0: mani_hash.session.commit(); # print time.time()-t; mani_hash.closeSession(); mani.closeSession();
def getHashInfoForImg(path_to_db,img_path): mani=Tube_Manipulator(path_to_db); mani.openSession(); #get patch id patch_id=mani.select((Tube.idx,),(Tube.img_path==img_path,)); assert len(patch_id)==1; patch_id=patch_id[0][0]; mani.closeSession(); #get hash vals mani_hash=TubeHash_Manipulator(path_to_db); mani_hash.openSession(); toSelect=(TubeHash.hash_table,TubeHash.hash_val) criterion=(TubeHash.idx==patch_id,); hash_info_patch=mani_hash.select(toSelect,criterion); mani_hash.closeSession(); return hash_info_patch
def getTotalCountsPerClass(path_to_db,class_idx_all): mani=Tube_Manipulator(path_to_db); mani.openSession(); total_counts={}; # total=0; for class_idx in class_idx_all: toSelect=(Tube.idx,) criterion = (Tube.class_idx_pascal==class_idx,); count_curr=mani.count(toSelect,criterion,distinct=True); total_counts[class_idx]=count_curr; # total=total+count_curr; # print class_label,class_idx,count_curr,count_curr/float(6371288),total_counts # print total mani.closeSession(); return total_counts
def getHashBinClassBreakdowns((hash_table, hash_val, path_to_db, out_file, idx)): print idx mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id, Tube.shot_id, Tube.tube_id) criterion = (TubeHash.hash_table == hash_table, TubeHash.hash_val == hash_val) vals = mani.selectMix(toSelect, criterion=criterion, distinct=True) mani.closeSession() vals = np.array(vals) class_idx_db = vals[:, 0] ids_db = vals[:, 1:] column_names = ['video', 'shot', 'tube'] counts = getClassCountsByIdType(class_idx_db, ids_db, column_names) # for k in counts.keys(): # for k2 in counts[k].keys(): # print k,k2,counts[k][k2]; # return counts pickle.dump(counts, open(out_file, 'wb'))
def script_saveBigFeatureMats(params): out_file_featureMats_pre=params.out_file_featureMats_pre out_file_meta_pre=params.out_file_meta_pre path_to_db=params.path_to_db out_file_paths=params.out_file_paths num_batches=params.num_batches if not os.path.exists(out_file_paths): mani=Tube_Manipulator(path_to_db); mani.openSession() paths_to_features=mani.select((Tube.deep_features_path,),distinct=True); paths_to_features=[path_curr[0] for path_curr in paths_to_features]; mani.closeSession(); random.shuffle(paths_to_features); pickle.dump(paths_to_features,open(out_file_paths,'wb')); paths_to_features=pickle.load(open(out_file_paths,'rb')); paths_to_features.sort(); batch_size=len(paths_to_features)/num_batches; idxRange=util.getIdxRange(len(paths_to_features),batch_size); print len(idxRange),idxRange[-1]; # start_idx=0; for start_idx in range(len(idxRange)-1): out_file_curr=out_file_featureMats_pre+'_'+str(start_idx)+'.npz'; out_file_meta_curr=out_file_meta_pre+'_'+str(start_idx)+'.p'; print start_idx,idxRange[start_idx],idxRange[start_idx+1],out_file_curr,out_file_meta_curr, paths_to_features_curr=paths_to_features[idxRange[start_idx]:idxRange[start_idx+1]] t=time.time(); train,shape_record=getGiantFeaturesMatGPU(paths_to_features_curr); train=np.array(train); np.savez(out_file_curr,train); pickle.dump([paths_to_features_curr,shape_record],open(out_file_meta_curr,'wb')) print time.time()-t break
def getHashBinClassBreakdowns((hash_table,hash_val,path_to_db,out_file,idx)): print idx mani=Tube_Manipulator(path_to_db) mani.openSession(); toSelect=(Tube.class_idx_pascal,Tube.video_id,Tube.shot_id,Tube.tube_id); criterion=(TubeHash.hash_table==hash_table,TubeHash.hash_val==hash_val); vals=mani.selectMix(toSelect,criterion=criterion,distinct=True); mani.closeSession(); vals=np.array(vals); class_idx_db=vals[:,0]; ids_db=vals[:,1:]; column_names=['video','shot','tube']; counts=getClassCountsByIdType(class_idx_db,ids_db,column_names) # for k in counts.keys(): # for k2 in counts[k].keys(): # print k,k2,counts[k][k2]; # return counts pickle.dump(counts,open(out_file,'wb'));
def script_saveNpzScorePerShot_normalized(params): path_to_db = params['path_to_db'] file_binCounts = params['file_binCounts'] class_idx = params['class_idx'] video_id = params['video_id'] shot_id = params['shot_id'] out_file_scores = params['out_file_scores'] num_hash_tables = params['num_hash_tables'] total_counts = params['total_class_counts'] class_idx_assume = params.get('class_idx_assume',None); if class_idx_assume is None: class_idx_assume=class_idx; print params['idx'] mani=Tube_Manipulator(path_to_db); mani.openSession(); toSelect=(Tube.tube_id,Tube.deep_features_idx,TubeHash.hash_table,TubeHash.hash_val); criterion=(Tube.video_id==video_id,Tube.class_idx_pascal==class_idx,Tube.shot_id==shot_id); vals=mani.selectMix(toSelect,criterion); total_frames = getShotFrameCount(mani,class_idx,video_id,shot_id); mani.closeSession(); hash_count_keys,hash_counts=pickle.load(open(file_binCounts,'rb')); hash_info=[tuple(r) for r in vals[:,2:]]; hash_counts = dict(Counter(hash_info)) # total_counts=np.sum(hash_counts,axis=0); scores_all={}; vals=np.array(vals) tube_ids_uni=np.unique(vals[:,0]); for tube_id in tube_ids_uni: vals_rel=vals[vals[:,0]==tube_id,1:]; deep_features_idx_uni = np.unique(vals_rel[:,0]); scores_tube=np.empty((len(deep_features_idx_uni),num_hash_tables)); scores_tube[:]=np.nan; for deep_features_idx in deep_features_idx_uni: hash_info=vals_rel[vals_rel[:,0]==deep_features_idx,1:]; assert len(hash_info)==num_hash_tables scores=[]; for hash_info_curr in hash_info: idx_curr=hash_count_keys.index(tuple(hash_info_curr)); counts_curr=hash_counts[idx_curr,:]; deno=counts_curr/total_counts.astype(dtype=float); numo=deno[class_idx_assume]; deno=sum(deno); score_curr=numo/float(deno); scores.append(score_curr); scores_tube[deep_features_idx,:]=scores; scores_all[tube_id]=scores_tube; for tube_id in scores_all: tube_scores=scores_all[tube_id]; assert np.sum(np.isnan(tube_scores))==0; pickle.dump(scores_all,open(out_file_scores,'wb'));
def getInfoForExtractionForTube(path_to_db,pascal_id,video_id,shot_id,tube_id): mani=Tube_Manipulator(path_to_db); mani.openSession(); info=mani.select((Tube.img_path,Tube.class_id_pascal,Tube.deep_features_path,Tube.deep_features_idx),(Tube.video_id==video_id,Tube.class_id_pascal==pascal_id,Tube.tube_id==tube_id,Tube.shot_id==shot_id),distinct=True); mani.closeSession(); return info
def script_saveNpzScorePerShot(params): path_to_db = params['path_to_db'] total_class_counts = params['total_class_counts'] class_idx = params['class_idx'] video_id = params['video_id'] shot_id = params['shot_id'] out_file_scores = params['out_file_scores'] path_to_hash = params['path_to_hash'] num_hash_tables = params['num_hash_tables'] class_idx_assume = params.get('class_idx_assume',None); if class_idx_assume is None: class_idx_assume=class_idx; print params['idx'] # print 'getting vals and frame count from db', # t=time.time(); mani=Tube_Manipulator(path_to_db); mani.openSession(); toSelect=(Tube.deep_features_path,Tube.tube_id,Tube.deep_features_idx,TubeHash.hash_table,TubeHash.hash_val); criterion=(Tube.video_id==video_id,Tube.class_idx_pascal==class_idx,Tube.shot_id==shot_id); vals=mani.selectMix(toSelect,criterion); total_frames = getShotFrameCount(mani,class_idx,video_id,shot_id); mani.closeSession(); # print time.time()-t # print 'getting hash_counts', # t=time.time(); hash_info=[(tuple_curr[3],tuple_curr[4]) for tuple_curr in vals]; hash_counts = dict(Counter(hash_info)) # print time.time()-t # print 'getting hash_bin_scores', # t=time.time(); if class_idx_assume ==class_idx: total_class_count=total_class_counts[class_idx_assume]-total_frames; else: total_class_count=total_class_counts[class_idx_assume]; hash_bin_scores={}; for idx,k in enumerate(hash_counts.keys()): in_file=str(k[0])+'_'+str(k[1])+'_counts.p' class_id_counts=pickle.load(open(os.path.join(path_to_hash,in_file),'rb')); if class_idx_assume ==class_idx: hash_bin_count=class_id_counts.get(class_idx_assume,0)-hash_counts[k]; else: hash_bin_count=class_id_counts.get(class_idx_assume,0) hash_bin_scores[k]=hash_bin_count/float(total_class_count); # print time.time()-t; # print 'getting tube_scores_all', # t=time.time(); vals_org=np.array(vals); deep_features_paths=vals_org[:,0]; vals=np.array(vals_org[:,1:],dtype=int); # Tube.tube_id,Tube.deep_features_idx,TubeHash.hash_table,TubeHash.hash_val tube_ids=np.unique(vals[:,0]) # deep_features_idx=np.unique(vals[:,1]); tube_scores_all={}; for tube_id in tube_ids: # print tube_id,len(deep_features_idx),len(np.unique(vals[vals[:,0]==tube_id,1])); deep_features_idx=np.unique(vals[vals[:,0]==tube_id,1]) tube_scores_all[tube_id]=getTubeScoresMat(tube_id,vals,hash_bin_scores,deep_features_idx,num_hash_tables); # print time.time()-t; # for tube_id in tube_scores_all: # tube_scores=tube_scores_all[tube_id]; # print tube_id,np.sum(np.isnan(tube_scores)),tube_scores.shape # # print tube_scores[0,:] # assert np.sum(np.isnan(tube_scores))==0; # out_file_temp='/disk2/temp/temp.p'; # np.savez_compressed(out_file_scores,tube_scores_all.values(),tube_scores_all.keys()) pickle.dump(tube_scores_all,open(out_file_scores,'wb'));
def script_saveNpzScorePerShot_normalized(params): path_to_db = params['path_to_db'] file_binCounts = params['file_binCounts'] class_idx = params['class_idx'] video_id = params['video_id'] shot_id = params['shot_id'] out_file_scores = params['out_file_scores'] num_hash_tables = params['num_hash_tables'] total_counts = params['total_class_counts'] class_idx_assume = params.get('class_idx_assume', None) if class_idx_assume is None: class_idx_assume = class_idx print params['idx'] mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.tube_id, Tube.deep_features_idx, TubeHash.hash_table, TubeHash.hash_val) criterion = (Tube.video_id == video_id, Tube.class_idx_pascal == class_idx, Tube.shot_id == shot_id) vals = mani.selectMix(toSelect, criterion) total_frames = getShotFrameCount(mani, class_idx, video_id, shot_id) mani.closeSession() hash_count_keys, hash_counts = pickle.load(open(file_binCounts, 'rb')) hash_info = [tuple(r) for r in vals[:, 2:]] hash_counts = dict(Counter(hash_info)) # total_counts=np.sum(hash_counts,axis=0); scores_all = {} vals = np.array(vals) tube_ids_uni = np.unique(vals[:, 0]) for tube_id in tube_ids_uni: vals_rel = vals[vals[:, 0] == tube_id, 1:] deep_features_idx_uni = np.unique(vals_rel[:, 0]) scores_tube = np.empty((len(deep_features_idx_uni), num_hash_tables)) scores_tube[:] = np.nan for deep_features_idx in deep_features_idx_uni: hash_info = vals_rel[vals_rel[:, 0] == deep_features_idx, 1:] assert len(hash_info) == num_hash_tables scores = [] for hash_info_curr in hash_info: idx_curr = hash_count_keys.index(tuple(hash_info_curr)) counts_curr = hash_counts[idx_curr, :] deno = counts_curr / total_counts.astype(dtype=float) numo = deno[class_idx_assume] deno = sum(deno) score_curr = numo / float(deno) scores.append(score_curr) scores_tube[deep_features_idx, :] = scores scores_all[tube_id] = scores_tube for tube_id in scores_all: tube_scores = scores_all[tube_id] assert np.sum(np.isnan(tube_scores)) == 0 pickle.dump(scores_all, open(out_file_scores, 'wb'))
def script_saveNpzScorePerShot(params): path_to_db = params['path_to_db'] total_class_counts = params['total_class_counts'] class_idx = params['class_idx'] video_id = params['video_id'] shot_id = params['shot_id'] out_file_scores = params['out_file_scores'] path_to_hash = params['path_to_hash'] num_hash_tables = params['num_hash_tables'] class_idx_assume = params.get('class_idx_assume', None) if class_idx_assume is None: class_idx_assume = class_idx print params['idx'] # print 'getting vals and frame count from db', # t=time.time(); mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.deep_features_path, Tube.tube_id, Tube.deep_features_idx, TubeHash.hash_table, TubeHash.hash_val) criterion = (Tube.video_id == video_id, Tube.class_idx_pascal == class_idx, Tube.shot_id == shot_id) vals = mani.selectMix(toSelect, criterion) total_frames = getShotFrameCount(mani, class_idx, video_id, shot_id) mani.closeSession() # print time.time()-t # print 'getting hash_counts', # t=time.time(); hash_info = [(tuple_curr[3], tuple_curr[4]) for tuple_curr in vals] hash_counts = dict(Counter(hash_info)) # print time.time()-t # print 'getting hash_bin_scores', # t=time.time(); if class_idx_assume == class_idx: total_class_count = total_class_counts[class_idx_assume] - total_frames else: total_class_count = total_class_counts[class_idx_assume] hash_bin_scores = {} for idx, k in enumerate(hash_counts.keys()): in_file = str(k[0]) + '_' + str(k[1]) + '_counts.p' class_id_counts = pickle.load( open(os.path.join(path_to_hash, in_file), 'rb')) if class_idx_assume == class_idx: hash_bin_count = class_id_counts.get(class_idx_assume, 0) - hash_counts[k] else: hash_bin_count = class_id_counts.get(class_idx_assume, 0) hash_bin_scores[k] = hash_bin_count / float(total_class_count) # print time.time()-t; # print 'getting tube_scores_all', # t=time.time(); vals_org = np.array(vals) deep_features_paths = vals_org[:, 0] vals = np.array(vals_org[:, 1:], dtype=int) # Tube.tube_id,Tube.deep_features_idx,TubeHash.hash_table,TubeHash.hash_val tube_ids = np.unique(vals[:, 0]) # deep_features_idx=np.unique(vals[:,1]); tube_scores_all = {} for tube_id in tube_ids: # print tube_id,len(deep_features_idx),len(np.unique(vals[vals[:,0]==tube_id,1])); deep_features_idx = np.unique(vals[vals[:, 0] == tube_id, 1]) tube_scores_all[tube_id] = getTubeScoresMat(tube_id, vals, hash_bin_scores, deep_features_idx, num_hash_tables) # print time.time()-t; # for tube_id in tube_scores_all: # tube_scores=tube_scores_all[tube_id]; # print tube_id,np.sum(np.isnan(tube_scores)),tube_scores.shape # # print tube_scores[0,:] # assert np.sum(np.isnan(tube_scores))==0; # out_file_temp='/disk2/temp/temp.p'; # np.savez_compressed(out_file_scores,tube_scores_all.values(),tube_scores_all.keys()) pickle.dump(tube_scores_all, open(out_file_scores, 'wb'))