def evaluateSearchEngines(searchers, collection, annotationName, metric, rootpath=ROOT_PATH): scorer = getScorer(metric) concepts = readConcepts(collection, annotationName, rootpath) nr_of_runs = len(searchers) nr_of_concepts = len(concepts) results = np.zeros((nr_of_concepts, nr_of_runs)) for i in range(nr_of_concepts): names, labels = readAnnotationsFrom(collection, annotationName, concepts[i], rootpath) name2label = dict(zip(names, labels)) for j in range(nr_of_runs): searchresults = searchers[j].scoreCollection(concepts[i]) sorted_labels = [ name2label[name] for (name, score) in searchresults if name in name2label ] results[i, j] = scorer.score(sorted_labels) for i in range(nr_of_concepts): print concepts[i], ' '.join([niceNumber(x, 3) for x in results[i, :]]) mean_perf = results.mean(0) print 'mean%s' % metric, ' '.join([niceNumber(x, 3) for x in mean_perf]) return concepts, results
def process(options, testCollection, trainCollection, tagsimMethod): rootpath = options.rootpath overwrite = options.overwrite testsetName = options.testset if options.testset else testCollection tpp = options.tpp numjobs = options.numjobs job = options.job useWnVob = 1 outputName = tagsimMethod + '-wn' if useWnVob else tagsimMethod if tagsimMethod == 'wns': resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, outputName,'id.tagvotes.txt') else: resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, trainCollection, outputName,'id.tagvotes.txt') if numjobs>1: resultfile = resultfile.replace("id.tagvotes.txt", "id.tagvotes.%d.%d.txt" % (numjobs,job)) if checkToSkip(resultfile, overwrite): sys.exit(0) makedirsforfile(resultfile) try: doneset = set([str.split(x)[0] for x in open(options.donefile).readlines()[:-1]]) except: doneset = set() printStatus(INFO, "done set: %d" % len(doneset)) testImageSet = readImageSet(testCollection, testCollection, rootpath) testImageSet = [x for x in testImageSet if x not in doneset] testImageSet = [testImageSet[i] for i in range(len(testImageSet)) if (i%numjobs+1) == job] printStatus(INFO, 'working on %d-%d, %d test images -> %s' % (numjobs,job,len(testImageSet),resultfile) ) testreader = TagReader(testCollection, rootpath=rootpath) if tagsimMethod == "wns": tagrel = SIM_TO_TAGREL["wns"](trainCollection, useWnVob, "wup", rootpath) else: tagrel = SIM_TO_TAGREL[tagsimMethod](trainCollection, useWnVob, rootpath) done = 0 fw = open(resultfile, "w") for qry_id in testImageSet: qry_tags = testreader.get(qry_id) tagvotes = tagrel.estimate(qry_tags) newline = qry_id + " " + " ".join(["%s %s" % (tag, niceNumber(vote,8)) for (tag,vote) in tagvotes]) fw.write(newline+"\n") done += 1 if done%1000 == 0: printStatus(INFO, "%d done" % done) # done fw.close() printStatus(INFO, "%d done" % done)
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): assert(modelName.startswith('fastlinear')) rootpath = options.rootpath overwrite = options.overwrite numjobs = options.numjobs job = options.job topk = options.topk outputName = '%s,%s' % (feature,modelName) resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs>1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job] test_imset = set(test_imset) nr_of_test_images = len(test_imset) printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile)) ma = ModelArray(trainCollection, trainAnnotationName, feature, modelName, rootpath=rootpath) feat_file = StreamFile(os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") done = 0 feat_file.open() for _id, _vec in feat_file: if _id not in test_imset: continue res = ma.predict([_vec],prob=0) tagvotes = res[0] if topk>0: tagvotes = tagvotes[:topk] newline = '%s %s\n' % (_id, " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes])) fw.write(newline) done += 1 if done % 1e4 == 0: printStatus(INFO, "%d done" % done) feat_file.close() fw.close() printStatus(INFO, "%d done" % (done)) return done
def process(options, trainCollection, annotationName, testCollection): rootpath = options.rootpath m = options.m k_r = options.kr k_d = options.kd k_s = options.ks k_c = options.kc feature = options.feature add_bonus = options.bonus overwrite = options.overwrite #outputName = 'cotag,m%d,kr%d,kd%d,ks%d,kc%d,bonus%d'%(m,k_r,k_d,k_s,k_c,add_bonus) outputName = 'cotag' # simplify the outputName to reduce the length of the result filename outputName = os.path.join(outputName, feature) if (k_c>1e-6) else outputName resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, annotationName, outputName, 'id.tagvotes.txt') if checkToSkip(resultfile, overwrite): sys.exit(0) testImageSet = readImageSet(testCollection, testCollection, rootpath=rootpath) test_tag_reader = TagReader(testCollection, rootpath=rootpath) if k_c < 1e-6: tagger = TagCooccurTagger(testCollection, trainCollection, annotationName, rootpath=rootpath) else: tagger = TagCooccurPlusTagger(testCollection, trainCollection, annotationName, feature=feature, rootpath=rootpath) tagger.m = m tagger.k_r = k_r tagger.k_d = k_d tagger.k_s = k_s tagger.k_c = k_c tagger.add_bonus = add_bonus makedirsforfile(resultfile) fw = open(resultfile, 'w') output = [] done = 0 for im in testImageSet: user_tags = test_tag_reader.get(im) tagvotes = tagger.predict(content=im, context=user_tags) newline = '%s %s' % (im, ' '.join(['%s %s'%(x[0], niceNumber(x[1],6)) for x in tagvotes])) output.append(newline) done += 1 if len(output) % 1e4 == 0: fw.write('\n'.join(output) + '\n') output=[] printStatus(INFO, '%d done' % done) if output: fw.write('\n'.join(output) + '\n') fw.close() printStatus(INFO, '%d done' % done)
def process(options, label_file, label2vec_dir, testCollection, feature, new_feature): rootpath = options.rootpath overwrite = options.overwrite k = options.k blocksize = options.blocksize subset = options.subset if options.subset else testCollection resfile = os.path.join(rootpath, testCollection, 'FeatureData', new_feature, 'id.feature.txt') if checkToSkip(resfile, overwrite): return 0 imsetfile = os.path.join(rootpath, testCollection, 'ImageSets', '%s.txt' % subset) imset = map(str.strip, open(imsetfile).readlines()) printStatus(INFO, '%d images to do' % len(imset)) feat_file = BigFile(os.path.join(rootpath, testCollection, 'FeatureData', feature)) im2vec = Image2Vec(label_file, label2vec_dir) makedirsforfile(resfile) fw = open(resfile, 'w') read_time = 0 run_time = 0 start = 0 done = 0 while start < len(imset): end = min(len(imset), start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end-1)) s_time = time.time() renamed, test_X = feat_file.read(imset[start:end]) read_time += time.time() - s_time s_time = time.time() output = [None] * len(renamed) for i in xrange(len(renamed)): vec = im2vec.embedding(test_X[i], k) output[i] = '%s %s\n' % (renamed[i], " ".join([niceNumber(x,6) for x in vec])) run_time += time.time() - s_time start = end fw.write(''.join(output)) done += len(output) # done printStatus(INFO, "%d done. read time %g seconds, run_time %g seconds" % (done, read_time, run_time)) fw.close() return done
def evaluateSearchEngines(searchers, collection, annotationName, metric, rootpath=ROOT_PATH): scorer = getScorer(metric) concepts = readConcepts(collection, annotationName, rootpath) nr_of_runs = len(searchers) nr_of_concepts = len(concepts) results = np.zeros((nr_of_concepts,nr_of_runs)) for i in range(nr_of_concepts): names, labels = readAnnotationsFrom(collection, annotationName, concepts[i], rootpath) name2label = dict(zip(names,labels)) for j in range(nr_of_runs): searchresults = searchers[j].scoreCollection(concepts[i]) sorted_labels = [name2label[name] for (name,score) in searchresults if name in name2label] results[i,j] = scorer.score(sorted_labels) for i in range(nr_of_concepts): print concepts[i], ' '.join([niceNumber(x,3) for x in results[i,:]]) mean_perf = results.mean(0) print 'mean%s'%metric, ' '.join([niceNumber(x,3) for x in mean_perf]) return concepts,results
def process(options, testCollection, trainCollection, annotationName, feature): rootpath = options.rootpath k = options.k distance = options.distance blocksize = options.blocksize donefile = options.donefile numjobs = options.numjobs job = options.job overwrite = options.overwrite taggerType = options.tagger noise = options.noise testset = options.testset if not testset: testset = testCollection modelName = taggerType if 'pretagvote' == taggerType and noise > 1e-3: modelName += '-noise%.2f' % noise if 'pqtagvote' == taggerType: nnName = "l2knn" else: nnName = distance + "knn" resultfile = os.path.join(rootpath, testCollection, 'autotagging', testset, trainCollection, annotationName, modelName, '%s,%s,%d' % (feature, nnName, k), 'id.tagvotes.txt') if numjobs > 1: resultfile += ".%d.%d" % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 if donefile: doneset = set([x.split()[0] for x in open(donefile) if x.strip()]) else: doneset = set() printStatus( INFO, "%d images have been done already, and they will be ignored" % len(doneset)) workingSet = readImageSet(testCollection, testset, rootpath) workingSet = [x for x in workingSet if x not in doneset] workingSet = [ workingSet[i] for i in range(len(workingSet)) if (i % numjobs + 1) == job ] test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature) test_feat_file = BigFile(test_feat_dir) tagger = NAME_TO_TAGGER[taggerType](trainCollection, annotationName, feature, distance, rootpath=rootpath) tagger.k = k tagger.noise = noise printStatus( INFO, "working on %d-%d, %d test images -> %s" % (numjobs, job, len(workingSet), resultfile)) makedirsforfile(resultfile) fw = open(resultfile, "w") read_time = 0.0 test_time = 0.0 start = 0 done = 0 while start < len(workingSet): end = min(len(workingSet), start + blocksize) printStatus(INFO, 'tagging images from %d to %d' % (start, end - 1)) s_time = time.time() renamed, vectors = test_feat_file.read(workingSet[start:end]) nr_images = len(renamed) read_time += time.time() - s_time s_time = time.time() output = [None] * nr_images for i in range(nr_images): tagvotes = tagger.predict(content=vectors[i], context='%s,%s' % (testCollection, renamed[i])) output[i] = '%s %s\n' % (renamed[i], " ".join([ "%s %s" % (tag, niceNumber(vote, 6)) for (tag, vote) in tagvotes ])) test_time += time.time() - s_time start = end fw.write(''.join(output)) done += len(output) fw.close() printStatus( INFO, '%d images tagged, read time %g seconds, test time %g seconds' % (done, read_time, test_time))
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): if modelName.startswith('fik'): from fiksvm.fiksvm import fiksvm_load_model as load_model else: from fastlinear.fastlinear import fastlinear_load_model as load_model rootpath = options.rootpath overwrite = options.overwrite prob_output = options.prob_output numjobs = options.numjobs job = options.job #blocksize = options.blocksize topk = options.topk outputName = '%s,%s' % (feature,modelName) if prob_output: outputName += ',prob' resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs>1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job] test_imset = set(test_imset) nr_of_test_images = len(test_imset) printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile)) models = [None] * nr_of_concepts for c in range(nr_of_concepts): model_file_name = os.path.join(rootpath,trainCollection,'Models',trainAnnotationName,feature, modelName, '%s.model'%concepts[c]) models[c] = load_model(model_file_name) if models[c] is None: return 0 #(pA,pB) = model.get_probAB() feat_file = StreamFile(os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") done = 0 feat_file.open() for _id, _vec in feat_file: if _id not in test_imset: continue if prob_output: scores = [models[c].predict_probability(_vec) for c in range(nr_of_concepts)] else: scores = [models[c].predict(_vec) for c in range(nr_of_concepts)] tagvotes = sorted(zip(concepts, scores), key=lambda v:v[1], reverse=True) if topk>0: tagvotes = tagvotes[:topk] newline = '%s %s\n' % (_id, " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes])) fw.write(newline) done += 1 if done % 1e4 == 0: printStatus(INFO, "%d done" % done) feat_file.close() fw.close() printStatus(INFO, "%d done" % (done)) return done
def process(options, trainCollection, annotationName, testCollection): rootpath = options.rootpath m = options.m k_r = options.kr k_d = options.kd k_s = options.ks k_c = options.kc feature = options.feature add_bonus = options.bonus overwrite = options.overwrite #outputName = 'cotag,m%d,kr%d,kd%d,ks%d,kc%d,bonus%d'%(m,k_r,k_d,k_s,k_c,add_bonus) outputName = 'cotag' # simplify the outputName to reduce the length of the result filename outputName = os.path.join(outputName, feature) if (k_c > 1e-6) else outputName resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, annotationName, outputName, 'id.tagvotes.txt') if checkToSkip(resultfile, overwrite): sys.exit(0) testImageSet = readImageSet(testCollection, testCollection, rootpath=rootpath) test_tag_reader = TagReader(testCollection, rootpath=rootpath) if k_c < 1e-6: tagger = TagCooccurTagger(testCollection, trainCollection, annotationName, rootpath=rootpath) else: tagger = TagCooccurPlusTagger(testCollection, trainCollection, annotationName, feature=feature, rootpath=rootpath) tagger.m = m tagger.k_r = k_r tagger.k_d = k_d tagger.k_s = k_s tagger.k_c = k_c tagger.add_bonus = add_bonus makedirsforfile(resultfile) fw = open(resultfile, 'w') output = [] done = 0 for im in testImageSet: user_tags = test_tag_reader.get(im) tagvotes = tagger.predict(content=im, context=user_tags) newline = '%s %s' % (im, ' '.join( ['%s %s' % (x[0], niceNumber(x[1], 6)) for x in tagvotes])) output.append(newline) done += 1 if len(output) % 1e4 == 0: fw.write('\n'.join(output) + '\n') output = [] printStatus(INFO, '%d done' % done) if output: fw.write('\n'.join(output) + '\n') fw.close() printStatus(INFO, '%d done' % done)
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): if modelName.startswith('fik'): from fiksvm.fiksvm import fiksvm_load_model as load_model else: from fastlinear.fastlinear import fastlinear_load_model as load_model rootpath = options.rootpath overwrite = options.overwrite prob_output = options.prob_output numjobs = options.numjobs job = options.job blocksize = options.blocksize outputName = '%s,%s' % (feature, modelName) if prob_output: outputName += ',prob' resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs > 1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection, trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [ test_imset[i] for i in range(len(test_imset)) if i % numjobs + 1 == job ] nr_of_test_images = len(test_imset) printStatus( INFO, "working on %d-%d, %d test images -> %s" % (numjobs, job, nr_of_test_images, resultfile)) models = [None] * nr_of_concepts for c in range(nr_of_concepts): model_file_name = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, modelName, '%s.model' % concepts[c]) models[c] = load_model(model_file_name) if models[c] is None: return 0 #(pA,pB) = model.get_probAB() feat_file = BigFile( os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") read_time = 0 test_time = 0 start = 0 done = 0 while start < nr_of_test_images: end = min(nr_of_test_images, start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end - 1)) s_time = time.time() renamed, test_X = feat_file.read(test_imset[start:end]) read_time += time.time() - s_time s_time = time.time() output = [None] * len(renamed) for i in xrange(len(renamed)): if prob_output: scores = [ models[c].predict_probability(test_X[i]) for c in range(nr_of_concepts) ] else: scores = [ models[c].predict(test_X[i]) for c in range(nr_of_concepts) ] #dec_value = sigmoid_predict(dec_value, A=pA, B=pB) tagvotes = sorted(zip(concepts, scores), key=lambda v: v[1], reverse=True) output[i] = '%s %s\n' % (renamed[i], " ".join([ "%s %s" % (tag, niceNumber(vote, 6)) for (tag, vote) in tagvotes ])) test_time += time.time() - s_time start = end fw.write(''.join(output)) fw.flush() done += len(output) # done printStatus( INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time)) fw.close() return done
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): if modelName.startswith('fik'): from fiksvm.fiksvm import fiksvm_load_model as load_model else: from fastlinear.fastlinear import fastlinear_load_model as load_model rootpath = options.rootpath overwrite = options.overwrite prob_output = options.prob_output numjobs = options.numjobs job = options.job blocksize = options.blocksize outputName = '%s,%s' % (feature,modelName) if prob_output: outputName += ',prob' resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs>1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job] nr_of_test_images = len(test_imset) printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile)) models = [None] * nr_of_concepts for c in range(nr_of_concepts): model_file_name = os.path.join(rootpath,trainCollection,'Models',trainAnnotationName,feature, modelName, '%s.model'%concepts[c]) models[c] = load_model(model_file_name) if models[c] is None: return 0 #(pA,pB) = model.get_probAB() feat_file = BigFile(os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") read_time = 0 test_time = 0 start = 0 done = 0 while start < nr_of_test_images: end = min(nr_of_test_images, start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end-1)) s_time = time.time() renamed, test_X = feat_file.read(test_imset[start:end]) read_time += time.time() - s_time s_time = time.time() output = [None] * len(renamed) for i in xrange(len(renamed)): if prob_output: scores = [models[c].predict_probability(test_X[i]) for c in range(nr_of_concepts)] else: scores = [models[c].predict(test_X[i]) for c in range(nr_of_concepts)] #dec_value = sigmoid_predict(dec_value, A=pA, B=pB) tagvotes = sorted(zip(concepts, scores), key=lambda v:v[1], reverse=True) output[i] = '%s %s\n' % (renamed[i], " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes])) test_time += time.time() - s_time start = end fw.write(''.join(output)) fw.flush() done += len(output) # done printStatus(INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time)) fw.close() return done
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): assert (modelName.startswith('fastlinear')) rootpath = options.rootpath overwrite = options.overwrite numjobs = options.numjobs job = options.job topk = options.topk outputName = '%s,%s' % (feature, modelName) resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs > 1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection, trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [ test_imset[i] for i in range(len(test_imset)) if i % numjobs + 1 == job ] test_imset = set(test_imset) nr_of_test_images = len(test_imset) printStatus( INFO, "working on %d-%d, %d test images -> %s" % (numjobs, job, nr_of_test_images, resultfile)) ma = ModelArray(trainCollection, trainAnnotationName, feature, modelName, rootpath=rootpath) feat_file = StreamFile( os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") done = 0 feat_file.open() for _id, _vec in feat_file: if _id not in test_imset: continue res = ma.predict([_vec], prob=0) tagvotes = res[0] if topk > 0: tagvotes = tagvotes[:topk] newline = '%s %s\n' % (_id, " ".join( ["%s %s" % (tag, niceNumber(vote, 6)) for (tag, vote) in tagvotes])) fw.write(newline) done += 1 if done % 1e4 == 0: printStatus(INFO, "%d done" % done) feat_file.close() fw.close() printStatus(INFO, "%d done" % (done)) return done
for i in range(len(renamed)): test_id = renamed[i] for concept,score in res[i]: ranklist.setdefault(concept,[]).append((test_id,score)) # evaluation concepts = readConcepts(testCollection,testAnnotationName,rootpath=rootpath) from basic.metric import getScorer scorer = getScorer('AP') mean_perf = 0.0 from basic.annotationtable import readAnnotationsFrom from basic.common import niceNumber for concept in concepts: names,labels = readAnnotationsFrom(testCollection,testAnnotationName,concept,skip_0=True,rootpath=rootpath) name2label = dict(zip(names,labels)) imagelist = ranklist[concept] imagelist.sort(key=lambda v:(v[1],v[0]), reverse=True) #print concept, imagelist[:3], imagelist[-3:] sorted_labels = [name2label[_id] for _id,_score in imagelist if _id in name2label] perf = scorer.score(sorted_labels) print concept, niceNumber(perf,3) mean_perf += perf mean_perf /= len(concepts) print 'MEAN', niceNumber(mean_perf,3)