import os import sg_utils as utils import coco_voc import shutil # Make directories for i in xrange(60): utils.mkdir_if_missing(os.path.join('..', 'data', 'images', '{:02d}'.format(i))) # Copy files over sets = ['train', 'val', 'test'] for set_ in sets: imdb = coco_voc.coco_voc(set_) for i in xrange(imdb.num_images): in_file = os.path.join('../data', set_ + '2014', \ 'COCO_{}2014_{:012d}.jpg'.format(set_, imdb.image_index[i])); out_file = imdb.image_path_at(i) # print in_file, out_file shutil.copyfile(in_file, out_file) utils.tic_toc_print(1, ' Copying images [{}]: {:06d} / {:06d}\n'.format(set_, i, imdb.num_images));
import os import sg_utils as utils import coco_voc import shutil # Make directories for i in xrange(60): utils.mkdir_if_missing(os.path.join('data', 'images', '{:02d}'.format(i))) # Copy files over sets = ['train', 'val', 'test'] for set_ in sets: imdb = coco_voc.coco_voc(set_) for i in xrange(imdb.num_images): in_file = os.path.join(set_ + '2014', \ 'COCO_{}2014_{:012d}.jpg'.format(set_, imdb.image_index[i])); out_file = imdb.image_path_at(i) # print in_file, out_file shutil.copyfile(in_file, out_file) utils.tic_toc_print(1, ' Copying images [{}]: {:06d} / {:06d}\n'.format(set_, i, imdb.num_images));
def mainTest(): ##DO NOT CHANGE numReferencesToEval = 5; minWords = 3; precThresh = 0.5; ##### testSetName='coco'; testSetSplit = 'valid2'; imdb = meu.get_imdb(testSetName, testSetSplit); has_gpu = False; if has_gpu: gpuId = 1 caffe.set_mode_gpu(); caffe.set_device(gpuId); else: caffe.set_mode_cpu(); print 'using CPU' #list of paths where we keep our caffe models caffeModelPaths = ['./experiments']; #output directory to write results #make sure it has >2GB free space detOutPath = './det-output'; #list of models we want to evaluate #make sure they have an entry in the function modelVocabConfig() in data_model_utils.py solverProtoList = [ 'vgg/mil_finetune_solver.prototxt',\ ] #iterations to evaluate # evalIters = [80000, 160000, 240000, 320000, 400000]; evalIters = [320000] for i in range(len(solverProtoList)): solverProtoName = solverProtoList[i]; vocab = meu.get_model_vocab(solverProtoName); infType = meu.get_model_inference_type(solverProtoList[i]); baseImageSize = meu.get_model_image_size(solverProtoList[i]); gtKeyedLabel = None for caffeModelPath in caffeModelPaths: solverProtoPath = os.path.join(caffeModelPath, solverProtoName); auxFiles = caffe_utils.get_model_aux_files_from_solver(\ solverProtoPath = solverProtoPath, caffeModelPath=caffeModelPath); if auxFiles == None: print 'could not find solver in %s'%(solverProtoPath) continue; if len(auxFiles['snapshotFiles']) == 0: print 'no snapshots found ', solverProtoPath continue; expSubDirBase = auxFiles['expSubDirBase']; expName = getExpNameFromSolverProtoName(solverProtoPath) expDirBase = os.path.join(expSubDirBase, expName) modelIterNums = [ caffe_utils.get_iter_from_model_file(snapFilePath)\ for snapFilePath in auxFiles['snapshotFiles'] ]; runInds = im_utils.argsort(modelIterNums, reverse=True); for ci, s in enumerate(runInds): snapFilePath = auxFiles['snapshotFiles'][s]; modelIterNumber = caffe_utils.get_iter_from_model_file(snapFilePath); if modelIterNumber not in evalIters: continue; print solverProtoPath, modelIterNumber modelOuts = getModelOutputPaths(detOutPath, expDirBase,\ expName, snapFilePath , testSetName, testSetSplit,\ numReferencesToEval = numReferencesToEval, minWords = minWords, precThresh = precThresh, ext='.h5'); detectionFile = modelOuts['detectionFile']; evalFile = modelOuts['evalFile']; #evaluate as in MILVC evalNoRefFile = evalFile.replace('.h5','_noref.h5'); #evaluate using standard definition of AP evalCocoManualGtFile = evalFile.replace('.h5','_cocomanualgt.h5'); #evaluate using COCO fully-labeled ground truth bdir = os.path.split(detectionFile)[0]; sg_utils.mkdir_if_missing(bdir); if not lock_utils.is_locked(detectionFile): model = loadModel(auxFiles['deployProtoPath'], snapFilePath, vocab, baseImageSize, infType); testModelBatch(imdb, model, detectionFile); lock_utils.unlock(detectionFile); else: print '%s locked'%(detectionFile) model = {}; model['inf_type'] = infType; model['vocab'] = vocab; gtLabel = getLabels(imdb, model, solverProtoName); #evaluate as in MILVC: using "weighted" version of AP; requires multiple gt references per image #e.g. in COCO captions we have 5 captions per image. So we for each "visual concept" we have 5 gt references if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile) and (not lock_utils.is_locked(evalFile)): model = {}; model['inf_type'] = infType; model['vocab'] = vocab; if infType=='MILNoise': evalModelBatch(imdb, model, gtLabel, \ numReferencesToEval, detectionFile, evalFile, evalNoiseKey='noisy_comb_noimage'); else: evalModelBatch(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalFile); lock_utils.unlock(evalFile); #evaluate using standard AP definition. Does not need multiple references. Hence the name "NoRef" if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile) and (not lock_utils.is_locked(evalNoRefFile)): model = {}; model['inf_type'] = infType; model['vocab'] = vocab; if infType=='MILNoise': evalModelBatchNoRef(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalNoRefFile, evalNoiseKey='noisy_comb_noimage'); else: evalModelBatchNoRef(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalNoRefFile); lock_utils.unlock(evalNoRefFile); #evaluate using fully labeled ground truth from COCO 80 detection classes. #we have a manual mapping defined from COCO 80 classes to the 1000 visual concepts if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile)\ and (not lock_utils.is_locked(evalCocoManualGtFile)): model = {}; model['inf_type'] = infType; model['vocab'] = vocab; cocoFile = './data/coco_instancesGT_eval_%s.h5'%(testSetSplit) dt = sg_utils.load(detectionFile); mil_prob = dt['mil_prob']; evalModelBatchOnClassificationCOCOManual(imdb, model,\ mil_prob, evalCocoManualGtFile, cocoFile) if infType=='MILNoise': mil_prob = dt['noisy_comb_noimage']; evalCocoManualGtNoiseFile = evalCocoManualGtFile.replace('.h5','_noise.h5') evalModelBatchOnClassificationCOCOManual(imdb, model,\ mil_prob, evalCocoManualGtNoiseFile, cocoFile) lock_utils.unlock(evalCocoManualGtFile); if imdb.name == 'coco' and lock_utils.file_ready_to_read(evalFile): print '=='*20; print 'AP (as computed in MILVC)' N_WORDS = len(vocab['words']) model = {}; model['inf_type'] = infType; model['vocab'] = vocab; cap_eval_utils.print_benchmark_latex(evalFile, vocab = vocab); evalFile = evalFile.replace('.h5','_noise.h5'); if os.path.isfile(evalFile): print 'noise' cap_eval_utils.print_benchmark_latex(evalFile, vocab = vocab); if imdb.name == 'coco' and lock_utils.file_ready_to_read(evalNoRefFile): print '=='*20; print 'AP (as computed in PASCAL VOC)' N_WORDS = len(vocab['words']) model = {}; model['inf_type'] = infType; model['vocab'] = vocab; cap_eval_utils.print_benchmark_latex(evalNoRefFile, vocab = vocab); evalNoRefFile = evalNoRefFile.replace('.h5','_noise.h5'); if os.path.isfile(evalNoRefFile): print 'noise' cap_eval_utils.print_benchmark_latex(evalNoRefFile, vocab = vocab); if imdb.name == 'coco' and lock_utils.file_ready_to_read(evalCocoManualGtFile): dt = sg_utils.load(evalCocoManualGtFile); dtMeta = sg_utils.load(evalCocoManualGtFile.replace('.h5','_meta.pkl')) classesFound = dtMeta['classesFound'] srtInds = im_utils.argsort(classesFound); accAP = np.zeros((1),dtype=np.float32); for ind in srtInds: accAP += dt['ap'][ind] print 'evaluate on fully-labeled GT:', print 'AP %.2f; classes %d'%(100*accAP/len(classesFound), len(classesFound)) evalCocoManualGtNoiseFile = evalCocoManualGtFile.replace('.h5','_noise.h5') if os.path.isfile(evalCocoManualGtNoiseFile): dt = sg_utils.load(evalCocoManualGtNoiseFile); print '--noise--' accAP = np.zeros((1),dtype=np.float32); for ind in srtInds: print '{:.2f} '.format(100*dt['ap'][ind]), accAP += dt['ap'][ind]; print '' print '%.2f; %d'%(100*accAP/len(classesFound), len(classesFound)) print '--'*10
print 'Writing labels to {}'.format(split_file) with open(split_file, 'wt') as f: for j in xrange(imdb[i].num_images): ind = imdb[i].image_index[j] ind_str = '{:02d}/{:d}'.format(int(math.floor(ind)/1e4), ind) f.write('{}\n'.format(ind_str)) # Print the command to start training if args.task == 'test_model': imdb = coco_voc.coco_voc(args.test_set) mean = np.array([[[ 103.939, 116.779, 123.68]]]); base_image_size = 565; model = load_model(args.prototxt_deploy, args.model, base_image_size, mean, vocab) out_dir = args.model + '_output' utils.mkdir_if_missing(out_dir) detection_file = os.path.join(out_dir, imdb.name + '_detections.pkl') test_model(imdb, model, detection_file = detection_file) if args.task == 'eval_model': imdb = coco_voc.coco_voc(args.test_set) gt_label = preprocess.get_vocab_counts(imdb.image_index, \ imdb.coco_caption_data, 5, vocab) out_dir = args.model + '_output' detection_file = os.path.join(out_dir, imdb.name + '_detections.pkl') eval_file = os.path.join(out_dir, imdb.name + '_eval.pkl') benchmark(imdb, vocab, gt_label, 5, detection_file, eval_file = eval_file) if args.task == 'output_words': out_dir = args.model + '_output'
def mainTest(): ##DO NOT CHANGE numReferencesToEval = 5 minWords = 3 precThresh = 0.5 ##### testSetName = 'coco' testSetSplit = 'valid2' imdb = meu.get_imdb(testSetName, testSetSplit) has_gpu = False if has_gpu: gpuId = 1 caffe.set_mode_gpu() caffe.set_device(gpuId) else: caffe.set_mode_cpu() print 'using CPU' #list of paths where we keep our caffe models caffeModelPaths = ['./experiments'] #output directory to write results #make sure it has >2GB free space detOutPath = './det-output' #list of models we want to evaluate #make sure they have an entry in the function modelVocabConfig() in data_model_utils.py solverProtoList = [ 'vgg/mil_finetune_solver.prototxt',\ ] #iterations to evaluate # evalIters = [80000, 160000, 240000, 320000, 400000]; evalIters = [320000] for i in range(len(solverProtoList)): solverProtoName = solverProtoList[i] vocab = meu.get_model_vocab(solverProtoName) infType = meu.get_model_inference_type(solverProtoList[i]) baseImageSize = meu.get_model_image_size(solverProtoList[i]) gtKeyedLabel = None for caffeModelPath in caffeModelPaths: solverProtoPath = os.path.join(caffeModelPath, solverProtoName) auxFiles = caffe_utils.get_model_aux_files_from_solver(\ solverProtoPath = solverProtoPath, caffeModelPath=caffeModelPath) if auxFiles == None: print 'could not find solver in %s' % (solverProtoPath) continue if len(auxFiles['snapshotFiles']) == 0: print 'no snapshots found ', solverProtoPath continue expSubDirBase = auxFiles['expSubDirBase'] expName = getExpNameFromSolverProtoName(solverProtoPath) expDirBase = os.path.join(expSubDirBase, expName) modelIterNums = [ caffe_utils.get_iter_from_model_file(snapFilePath)\ for snapFilePath in auxFiles['snapshotFiles'] ] runInds = im_utils.argsort(modelIterNums, reverse=True) for ci, s in enumerate(runInds): snapFilePath = auxFiles['snapshotFiles'][s] modelIterNumber = caffe_utils.get_iter_from_model_file( snapFilePath) if modelIterNumber not in evalIters: continue print solverProtoPath, modelIterNumber modelOuts = getModelOutputPaths(detOutPath, expDirBase,\ expName, snapFilePath , testSetName, testSetSplit,\ numReferencesToEval = numReferencesToEval, minWords = minWords, precThresh = precThresh, ext='.h5') detectionFile = modelOuts['detectionFile'] evalFile = modelOuts['evalFile'] #evaluate as in MILVC evalNoRefFile = evalFile.replace('.h5', '_noref.h5') #evaluate using standard definition of AP evalCocoManualGtFile = evalFile.replace( '.h5', '_cocomanualgt.h5') #evaluate using COCO fully-labeled ground truth bdir = os.path.split(detectionFile)[0] sg_utils.mkdir_if_missing(bdir) if not lock_utils.is_locked(detectionFile): model = loadModel(auxFiles['deployProtoPath'], snapFilePath, vocab, baseImageSize, infType) testModelBatch(imdb, model, detectionFile) lock_utils.unlock(detectionFile) else: print '%s locked' % (detectionFile) model = {} model['inf_type'] = infType model['vocab'] = vocab gtLabel = getLabels(imdb, model, solverProtoName) #evaluate as in MILVC: using "weighted" version of AP; requires multiple gt references per image #e.g. in COCO captions we have 5 captions per image. So we for each "visual concept" we have 5 gt references if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile) and (not lock_utils.is_locked(evalFile)): model = {} model['inf_type'] = infType model['vocab'] = vocab if infType == 'MILNoise': evalModelBatch(imdb, model, gtLabel, \ numReferencesToEval, detectionFile, evalFile, evalNoiseKey='noisy_comb_noimage') else: evalModelBatch(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalFile) lock_utils.unlock(evalFile) #evaluate using standard AP definition. Does not need multiple references. Hence the name "NoRef" if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile) and (not lock_utils.is_locked(evalNoRefFile)): model = {} model['inf_type'] = infType model['vocab'] = vocab if infType == 'MILNoise': evalModelBatchNoRef(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalNoRefFile, evalNoiseKey='noisy_comb_noimage') else: evalModelBatchNoRef(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalNoRefFile) lock_utils.unlock(evalNoRefFile) #evaluate using fully labeled ground truth from COCO 80 detection classes. #we have a manual mapping defined from COCO 80 classes to the 1000 visual concepts if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile)\ and (not lock_utils.is_locked(evalCocoManualGtFile)): model = {} model['inf_type'] = infType model['vocab'] = vocab cocoFile = './data/coco_instancesGT_eval_%s.h5' % ( testSetSplit) dt = sg_utils.load(detectionFile) mil_prob = dt['mil_prob'] evalModelBatchOnClassificationCOCOManual(imdb, model,\ mil_prob, evalCocoManualGtFile, cocoFile) if infType == 'MILNoise': mil_prob = dt['noisy_comb_noimage'] evalCocoManualGtNoiseFile = evalCocoManualGtFile.replace( '.h5', '_noise.h5') evalModelBatchOnClassificationCOCOManual(imdb, model,\ mil_prob, evalCocoManualGtNoiseFile, cocoFile) lock_utils.unlock(evalCocoManualGtFile) if imdb.name == 'coco' and lock_utils.file_ready_to_read( evalFile): print '==' * 20 print 'AP (as computed in MILVC)' N_WORDS = len(vocab['words']) model = {} model['inf_type'] = infType model['vocab'] = vocab cap_eval_utils.print_benchmark_latex(evalFile, vocab=vocab) evalFile = evalFile.replace('.h5', '_noise.h5') if os.path.isfile(evalFile): print 'noise' cap_eval_utils.print_benchmark_latex(evalFile, vocab=vocab) if imdb.name == 'coco' and lock_utils.file_ready_to_read( evalNoRefFile): print '==' * 20 print 'AP (as computed in PASCAL VOC)' N_WORDS = len(vocab['words']) model = {} model['inf_type'] = infType model['vocab'] = vocab cap_eval_utils.print_benchmark_latex(evalNoRefFile, vocab=vocab) evalNoRefFile = evalNoRefFile.replace('.h5', '_noise.h5') if os.path.isfile(evalNoRefFile): print 'noise' cap_eval_utils.print_benchmark_latex(evalNoRefFile, vocab=vocab) if imdb.name == 'coco' and lock_utils.file_ready_to_read( evalCocoManualGtFile): dt = sg_utils.load(evalCocoManualGtFile) dtMeta = sg_utils.load( evalCocoManualGtFile.replace('.h5', '_meta.pkl')) classesFound = dtMeta['classesFound'] srtInds = im_utils.argsort(classesFound) accAP = np.zeros((1), dtype=np.float32) for ind in srtInds: accAP += dt['ap'][ind] print 'evaluate on fully-labeled GT:', print 'AP %.2f; classes %d' % ( 100 * accAP / len(classesFound), len(classesFound)) evalCocoManualGtNoiseFile = evalCocoManualGtFile.replace( '.h5', '_noise.h5') if os.path.isfile(evalCocoManualGtNoiseFile): dt = sg_utils.load(evalCocoManualGtNoiseFile) print '--noise--' accAP = np.zeros((1), dtype=np.float32) for ind in srtInds: print '{:.2f} '.format(100 * dt['ap'][ind]), accAP += dt['ap'][ind] print '' print '%.2f; %d' % (100 * accAP / len(classesFound), len(classesFound)) print '--' * 10