def process(options, collection, annotationName): rootpath = options.rootpath overwrite = options.overwrite concepts = readConcepts(collection,annotationName,rootpath) resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd") todo = [x for x in concepts if not os.path.exists(os.path.join(resultdir,x+'.txt')) or overwrite] if not todo: printStatus(INFO, 'nothing to do') return fcs = FlickrContextSim(collection, rootpath=rootpath) vob = fcs.vob resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd") printStatus(INFO, 'expanding tags for %s-%s -> %s' % (collection, annotationName, resultdir)) for concept in todo: resultfile = os.path.join(resultdir, concept + '.txt') vals = [] for tag in vob: dist = fcs.computeNGD(concept, tag, img=1) if dist < 10: vals.append((tag,dist)) vals.sort(key=lambda v:v[1]) printStatus(INFO, '%s -> %s' % (concept, ' '.join([x[0] for x in vals[:3]]))) writeRankingResults(vals, resultfile)
def process(options, collection, annotationName): rootpath = options.rootpath overwrite = options.overwrite concepts = readConcepts(collection, annotationName, rootpath) resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd") todo = [ x for x in concepts if not os.path.exists(os.path.join(resultdir, x + '.txt')) or overwrite ] if not todo: printStatus(INFO, 'nothing to do') return fcs = FlickrContextSim(collection, rootpath=rootpath) vob = fcs.vob resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd") printStatus( INFO, 'expanding tags for %s-%s -> %s' % (collection, annotationName, resultdir)) for concept in todo: resultfile = os.path.join(resultdir, concept + '.txt') vals = [] for tag in vob: dist = fcs.computeNGD(concept, tag, img=1) if dist < 10: vals.append((tag, dist)) vals.sort(key=lambda v: v[1]) printStatus(INFO, '%s -> %s' % (concept, ' '.join([x[0] for x in vals[:3]]))) writeRankingResults(vals, resultfile)
def submit(searchers, collection,annotationName, rootpath=ROOT_PATH, overwrite=0): concepts = readConcepts(collection, annotationName, rootpath=rootpath) nr_of_runs = len(searchers) for concept in concepts: for j in range(nr_of_runs): resultfile = os.path.join(searchers[j].getOutputdir(), concept + ".txt") if checkToSkip(resultfile, overwrite): continue searchresults = searchers[j].scoreCollection(concept) print ("%s: %s %d -> %s" % (searchers[j].name, concept, len(searchresults), resultfile)) writeRankingResults(searchresults, resultfile) printStatus('%s.submit'%os.path.basename(__file__), "done")
def process(options, testCollection, annotationName, tagvotefile): rootpath = options.rootpath tpp = options.tpp tagged = options.tagged overwrite = options.overwrite resultdir = generate_result_dir(options, testCollection, tagvotefile) concepts = readConcepts(testCollection, annotationName, rootpath) todo = [] for concept in concepts: resfile = os.path.join(resultdir, '%s.txt'%concept) if checkToSkip(resfile, overwrite): continue todo.append(concept) if not todo: print ('nothing to do') return 0 nr_of_concepts = len(todo) labeled_set = [None] * nr_of_concepts if tagged: for i in range(nr_of_concepts): labeled_set[i] = set(readLabeledImageSet(testCollection, todo[i], tpp, rootpath)) concept2index = dict(zip(todo, range(nr_of_concepts))) ranklists = [[] for i in range(nr_of_concepts)] for line in open(tagvotefile): elems = line.strip().split() imageid = elems[0] del elems[0] assert(len(elems)%2==0) for i in range(0, len(elems), 2): tag = elems[i] c = concept2index.get(tag, -1) if c >= 0: if tagged and imageid not in labeled_set[c]: continue score = float(elems[i+1]) ranklists[c].append((imageid,score)) for i in range(nr_of_concepts): concept = todo[i] resfile = os.path.join(resultdir, '%s.txt'%concept) ranklist = sorted(ranklists[i], key=lambda v:v[1], reverse=True) print ('%s %d -> %s' % (concept, len(ranklist), resfile)) writeRankingResults(ranklist, resfile)
def process(options, collection, annotationName, runfile, newRunName): rootpath = options.rootpath overwrite = options.overwrite dataset = options.testset if options.testset else collection concepts = readConcepts(collection, annotationName, rootpath) simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset) data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")] models = [] for line in data: weight, run = str.split(line) models.append((run, float(weight), 1)) for concept in concepts: resultfile = os.path.join(simdir, newRunName, concept + ".txt") if checkToSkip(resultfile, overwrite): continue scorefile = os.path.join(simdir, models[0][0], concept + ".txt") if not os.path.exists(scorefile): print("%s does not exist. skip" % scorefile) continue ranklist = readRankingResults(scorefile) names = sorted([x[0] for x in ranklist]) nr_of_images = len(names) name2index = dict(zip(names, range(nr_of_images))) print("%s %d" % (concept, nr_of_images)) scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank) assert scoreTable.shape[1] == nr_of_images weights = [model[1] for model in models] scores = np.matrix(weights) * scoreTable scores = [float(scores[0, k]) for k in range(nr_of_images)] newranklist = [(names[i], scores[i]) for i in range(nr_of_images)] newranklist.sort(key=lambda v: (v[1], v[0]), reverse=True) writeRankingResults(newranklist, resultfile)
def process(options, collection, annotationName, runfile, newRunName): rootpath = options.rootpath overwrite = options.overwrite dataset = options.testset if options.testset else collection concepts = readConcepts(collection, annotationName, rootpath) simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset) data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")] models = [] for line in data: weight,run = str.split(line) models.append((run, float(weight), 1)) for concept in concepts: resultfile = os.path.join(simdir, newRunName, concept + ".txt") if checkToSkip(resultfile, overwrite): continue scorefile = os.path.join(simdir, models[0][0], concept + ".txt") if not os.path.exists(scorefile): print ("%s does not exist. skip" % scorefile) continue ranklist = readRankingResults(scorefile) names = sorted([x[0] for x in ranklist]) nr_of_images = len(names) name2index = dict(zip(names, range(nr_of_images))) print ('%s %d' % (concept, nr_of_images)) scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank) assert(scoreTable.shape[1] == nr_of_images) weights = [model[1] for model in models] scores = np.matrix(weights) * scoreTable scores = [float(scores[0,k]) for k in range(nr_of_images)] newranklist = [(names[i], scores[i]) for i in range(nr_of_images)] newranklist.sort(key=lambda v:(v[1],v[0]), reverse=True) writeRankingResults(newranklist, resultfile)
def submit(searchers, collection, annotationName, rootpath=ROOT_PATH, overwrite=0): concepts = readConcepts(collection, annotationName, rootpath=rootpath) nr_of_runs = len(searchers) for concept in concepts: for j in range(nr_of_runs): resultfile = os.path.join(searchers[j].getOutputdir(), concept + ".txt") if checkToSkip(resultfile, overwrite): continue searchresults = searchers[j].scoreCollection(concept) print("%s: %s %d -> %s" % (searchers[j].name, concept, len(searchresults), resultfile)) writeRankingResults(searchresults, resultfile) printStatus('%s.submit' % os.path.basename(__file__), "done")
def process(options, collection, annotationName, runfile): rootpath = options.rootpath overwrite = options.overwrite resultdir = os.path.join(rootpath, collection, 'SimilarityIndex', collection) apscorer = getScorer('AP') datafiles = [ x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith('#') ] nr_of_runs = len(datafiles) concepts = readConcepts(collection, annotationName, rootpath=rootpath) nr_of_concepts = len(concepts) printStatus(INFO, 'read annotations from files') name2label = [{} for i in range(nr_of_concepts)] hit_imgset = [[] for i in range(nr_of_concepts)] for i in range(nr_of_concepts): names, labels = readAnnotationsFrom(collection, annotationName, concepts[i], skip_0=False, rootpath=rootpath) names = map(int, names) name2label[i] = dict(zip(names, labels)) label_file = os.path.join(rootpath, collection, 'tagged,lemm', '%s.txt' % concepts[i]) try: hit_imgset[i] = set(map(int, open(label_file).readlines())) except: hit_imgset[i] = set() printStatus( INFO, 'readLabeledImageSet for %s-%s -> %d hits' % (collection, concepts[i], len(hit_imgset[i]))) ap_table = np.zeros((nr_of_runs, nr_of_concepts)) ap2_table = np.zeros((nr_of_runs, nr_of_concepts)) for run_idx in range(nr_of_runs): runName = os.path.splitext(os.path.basename(datafiles[run_idx]))[0] data = pickle.load(open(datafiles[run_idx], 'rb')) scores = data['scores'] assert (scores.shape[1] == nr_of_concepts) imset = data['id_images'] nr_of_images = len(imset) #print datafiles[run_idx], imset[:5], imset[-5:] for c_idx in range(nr_of_concepts): ground_truth = name2label[c_idx] ranklist = zip(imset, scores[:, c_idx]) ranklist.sort(key=lambda v: (v[1], str(v[0])), reverse=True) ranklist = [x for x in ranklist if x[0] in ground_truth] resfile = os.path.join(resultdir, runName, '%s.txt' % concepts[c_idx]) if not checkToSkip(resfile, overwrite): writeRankingResults(ranklist, resfile) sorted_labels = [ground_truth[x[0]] for x in ranklist] assert (len(sorted_labels) > 0) #print concepts[c_idx], ranklist[:5], sorted_labels[:5] ap_table[run_idx, c_idx] = apscorer.score(sorted_labels) resfile = os.path.join(resultdir, 'tagged,lemm', runName, '%s.txt' % concepts[c_idx]) if not checkToSkip(resfile, overwrite): writeRankingResults( [x for x in ranklist if x[0] in hit_imgset[c_idx]], resfile) sorted_labels = [ ground_truth[x[0]] for x in ranklist if x[0] in hit_imgset[c_idx] ] ap2_table[run_idx, c_idx] = apscorer.score(sorted_labels) print '#' * 100 print '# untagged-concept', ' '.join( [os.path.basename(x) for x in datafiles]) print '#' * 100 for c_idx in range(nr_of_concepts): print concepts[c_idx], ' '.join( ['%.3f' % x for x in ap_table[:, c_idx]]) print 'meanAP', ' '.join(['%.3f' % x for x in ap_table.mean(axis=1)]) print '#' * 100 print '# tagged-concept' print '#' * 100 for c_idx in range(nr_of_concepts): print concepts[c_idx], ' '.join( ['%.3f' % x for x in ap2_table[:, c_idx]]) print 'meanAP2', ' '.join(['%.3f' % x for x in ap2_table.mean(axis=1)])
def process(options, collection, annotationName, runfile): rootpath = options.rootpath overwrite = options.overwrite resultdir = os.path.join(rootpath, collection, 'SimilarityIndex', collection) apscorer = getScorer('AP') datafiles = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith('#')] nr_of_runs = len(datafiles) concepts = readConcepts(collection, annotationName, rootpath=rootpath) nr_of_concepts = len(concepts) printStatus(INFO, 'read annotations from files') name2label = [{} for i in range(nr_of_concepts)] hit_imgset = [[] for i in range(nr_of_concepts)] for i in range(nr_of_concepts): names,labels = readAnnotationsFrom(collection, annotationName, concepts[i], skip_0=False, rootpath=rootpath) names = map(int, names) name2label[i] = dict(zip(names,labels)) label_file = os.path.join(rootpath, collection, 'tagged,lemm', '%s.txt'% concepts[i]) try: hit_imgset[i] = set(map(int, open(label_file).readlines())) except: hit_imgset[i] = set() printStatus(INFO, 'readLabeledImageSet for %s-%s -> %d hits' % (collection, concepts[i], len(hit_imgset[i]))) ap_table = np.zeros((nr_of_runs, nr_of_concepts)) ap2_table = np.zeros((nr_of_runs, nr_of_concepts)) for run_idx in range(nr_of_runs): runName = os.path.splitext(os.path.basename(datafiles[run_idx]))[0] data = pickle.load(open(datafiles[run_idx],'rb')) scores = data['scores'] assert(scores.shape[1] == nr_of_concepts) imset = data['id_images'] nr_of_images = len(imset) #print datafiles[run_idx], imset[:5], imset[-5:] for c_idx in range(nr_of_concepts): ground_truth = name2label[c_idx] ranklist = zip(imset, scores[:,c_idx]) ranklist.sort(key=lambda v:(v[1], str(v[0])), reverse=True) ranklist = [x for x in ranklist if x[0] in ground_truth] resfile = os.path.join(resultdir, runName, '%s.txt' % concepts[c_idx]) if not checkToSkip(resfile, overwrite): writeRankingResults(ranklist, resfile) sorted_labels = [ground_truth[x[0]] for x in ranklist] assert(len(sorted_labels)>0) #print concepts[c_idx], ranklist[:5], sorted_labels[:5] ap_table[run_idx, c_idx] = apscorer.score(sorted_labels) resfile = os.path.join(resultdir, 'tagged,lemm', runName, '%s.txt' % concepts[c_idx]) if not checkToSkip(resfile, overwrite): writeRankingResults([x for x in ranklist if x[0] in hit_imgset[c_idx]], resfile) sorted_labels = [ground_truth[x[0]] for x in ranklist if x[0] in hit_imgset[c_idx]] ap2_table[run_idx, c_idx] = apscorer.score(sorted_labels) print '#'*100 print '# untagged-concept', ' '.join([os.path.basename(x) for x in datafiles]) print '#'*100 for c_idx in range(nr_of_concepts): print concepts[c_idx], ' '.join(['%.3f' % x for x in ap_table[:,c_idx]]) print 'meanAP', ' '.join(['%.3f' % x for x in ap_table.mean(axis=1)]) print '#'*100 print '# tagged-concept' print '#'*100 for c_idx in range(nr_of_concepts): print concepts[c_idx], ' '.join(['%.3f' % x for x in ap2_table[:,c_idx]]) print 'meanAP2', ' '.join(['%.3f' % x for x in ap2_table.mean(axis=1)])
print 'training %s' % tag from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear svm_params = '-s 2 -B -1 -q' model = train(y, vectors, svm_params) fastmodel = liblinear_to_fastlinear([model], [1.0], feat_dim) # optionally save the learned model to disk from model_based.svms.fastlinear.fastlinear import fastlinear_save_model model_dir = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, 'fastlinear') model_filename = os.path.join(model_dir, '%s.model' % tag) from basic.common import makedirsforfile makedirsforfile(model_filename) fastlinear_save_model(model_filename, fastmodel) print 'applying %s' % tag from model_based.svms.mlengine_util import classify_large_data ranklist = classify_large_data(fastmodel, test_imset, test_feat_file) #predict_scores = [fastmodel.predict(x) for x in test_vectors] #ranklist = sorted(zip(test_renamed, predict_scores), key=lambda v:(v[1],v[0]), reverse=True) from basic.common import writeRankingResults simdir = os.path.join(rootpath, testCollection, 'SimilarityIndex', testCollection, trainCollection, 'conceptsmm15tut.txt', '%s,fastlinear' % feature) resultfile = os.path.join(simdir, '%s.txt' % tag) writeRankingResults(ranklist, resultfile)
def process(options, trainCollection, testCollection, feature): rootpath = options.rootpath k = options.k distance = options.distance blocksize = options.blocksize uniqueUser = options.uu numjobs = options.numjobs job = options.job overwrite = options.overwrite testset = options.testset if not testset: testset = testCollection searchMethod = distance + 'knn' if uniqueUser: searchMethod += ",uu" tagfile = os.path.join(rootpath, trainCollection, 'TextData', 'id.userid.lemmtags.txt') im2user = {} for line in open(tagfile): im,userid,tags = line.split('\t') im2user[im] = userid resultdir = os.path.join(rootpath, testCollection, "SimilarityIndex", testset, trainCollection, "%s,%s,%d" % (feature,searchMethod,k)) feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData', feature) id_file = os.path.join(feat_dir, 'id.txt') shape_file = os.path.join(feat_dir, 'shape.txt') nr_of_images, feat_dim = map(int, open(shape_file).readline().split()) nr_of_images = len(open(id_file).readline().strip().split()) searcher = imagesearch.load_model(os.path.join(feat_dir, 'feature.bin'), feat_dim, nr_of_images, id_file) searcher.set_distance(distance) workingSet = readImageSet(testCollection, testset, rootpath=rootpath) workingSet = [workingSet[i] for i in range(len(workingSet)) if (i%numjobs+1) == job] printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,len(workingSet),resultdir)) test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature) test_feat_file = BigFile(test_feat_dir) read_time = 0 knn_time = 0 start = 0 done = 0 filtered = 0 while start < len(workingSet): end = min(len(workingSet), start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end-1)) s_time = time.time() renamed,vectors = test_feat_file.read(workingSet[start:end]) read_time += time.time() - s_time nr_images = len(renamed) s_time = time.time() for i in range(nr_images): resultfile = os.path.join(resultdir, renamed[i][-2:], '%s.txt' % renamed[i]) if checkToSkip(resultfile, overwrite): continue knn = searcher.search_knn(vectors[i], max_hits=max(3000,k*3)) if uniqueUser: removed, newknn = unique_user_constraint(knn, im2user, k) filtered += removed knn = newknn else: knn = knn[:k] assert(len(knn) >= k) writeRankingResults(knn, resultfile) done += 1 printStatus(INFO, 'job %d-%d: %d done, filtered neighbors %d' % (numjobs, job, done, filtered)) start = end printStatus(INFO, 'job %d-%d: %d done, filtered neighbors %d' % (numjobs, job, done, filtered))
(renamed, vectors) = train_feat_file.read(names) y = [name2label[x] for x in renamed] print 'training %s' % tag from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear svm_params = '-s 2 -B -1 -q' model = train(y, vectors, svm_params) fastmodel = liblinear_to_fastlinear([model], [1.0], feat_dim) # optionally save the learned model to disk from model_based.svms.fastlinear.fastlinear import fastlinear_save_model model_dir = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, 'fastlinear') model_filename = os.path.join(model_dir, '%s.model' % tag) from basic.common import makedirsforfile makedirsforfile(model_filename) fastlinear_save_model(model_filename, fastmodel) print 'applying %s' % tag from model_based.svms.mlengine_util import classify_large_data ranklist = classify_large_data(fastmodel, test_imset, test_feat_file) #predict_scores = [fastmodel.predict(x) for x in test_vectors] #ranklist = sorted(zip(test_renamed, predict_scores), key=lambda v:(v[1],v[0]), reverse=True) from basic.common import writeRankingResults simdir = os.path.join(rootpath, testCollection, 'SimilarityIndex', testCollection, trainCollection, 'conceptsmm15tut.txt', '%s,fastlinear'%feature) resultfile = os.path.join(simdir, '%s.txt' % tag) writeRankingResults(ranklist, resultfile)