예제 #1
0
def process(options, collection, annotationName):
    rootpath = options.rootpath
    overwrite = options.overwrite

    concepts = readConcepts(collection,annotationName,rootpath)
    resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd")

    todo = [x for x in concepts if not os.path.exists(os.path.join(resultdir,x+'.txt')) or overwrite]
    if not todo:
        printStatus(INFO, 'nothing to do')
        return

    fcs = FlickrContextSim(collection, rootpath=rootpath)
    vob = fcs.vob
    resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd")
    printStatus(INFO, 'expanding tags for %s-%s -> %s' % (collection, annotationName, resultdir))
    
    for concept in todo:
        resultfile = os.path.join(resultdir, concept + '.txt')
            
        vals = []
        for tag in vob:
            dist = fcs.computeNGD(concept, tag, img=1)
            if dist < 10:
                vals.append((tag,dist))
        vals.sort(key=lambda v:v[1])
        printStatus(INFO, '%s -> %s' % (concept, ' '.join([x[0] for x in vals[:3]])))
        writeRankingResults(vals, resultfile)
예제 #2
0
def process(options, collection, annotationName):
    rootpath = options.rootpath
    overwrite = options.overwrite

    concepts = readConcepts(collection, annotationName, rootpath)
    resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd")

    todo = [
        x for x in concepts
        if not os.path.exists(os.path.join(resultdir, x + '.txt')) or overwrite
    ]
    if not todo:
        printStatus(INFO, 'nothing to do')
        return

    fcs = FlickrContextSim(collection, rootpath=rootpath)
    vob = fcs.vob
    resultdir = os.path.join(rootpath, collection, "SimilarityIndex", "ngd")
    printStatus(
        INFO, 'expanding tags for %s-%s -> %s' %
        (collection, annotationName, resultdir))

    for concept in todo:
        resultfile = os.path.join(resultdir, concept + '.txt')

        vals = []
        for tag in vob:
            dist = fcs.computeNGD(concept, tag, img=1)
            if dist < 10:
                vals.append((tag, dist))
        vals.sort(key=lambda v: v[1])
        printStatus(INFO,
                    '%s -> %s' % (concept, ' '.join([x[0] for x in vals[:3]])))
        writeRankingResults(vals, resultfile)
예제 #3
0
def submit(searchers, collection,annotationName, rootpath=ROOT_PATH, overwrite=0):
    concepts = readConcepts(collection, annotationName, rootpath=rootpath)
    nr_of_runs = len(searchers)

    for concept in concepts:
        for j in range(nr_of_runs):
            resultfile = os.path.join(searchers[j].getOutputdir(), concept + ".txt")
            if checkToSkip(resultfile, overwrite):
                continue
            searchresults = searchers[j].scoreCollection(concept)
            print ("%s: %s %d -> %s" % (searchers[j].name, concept, len(searchresults), resultfile))
            writeRankingResults(searchresults, resultfile)

    printStatus('%s.submit'%os.path.basename(__file__), "done")
예제 #4
0
def process(options, testCollection, annotationName, tagvotefile):
    rootpath = options.rootpath
    tpp = options.tpp
    tagged = options.tagged
    overwrite = options.overwrite

    resultdir = generate_result_dir(options, testCollection, tagvotefile)
    
    concepts = readConcepts(testCollection, annotationName, rootpath)
    todo = []
    for concept in concepts:
        resfile = os.path.join(resultdir, '%s.txt'%concept)
        if checkToSkip(resfile, overwrite):
            continue
        todo.append(concept)

    if not todo:
        print ('nothing to do')
        return 0

    nr_of_concepts = len(todo)
    labeled_set = [None] * nr_of_concepts
    if tagged:
        for i in range(nr_of_concepts):
            labeled_set[i] = set(readLabeledImageSet(testCollection, todo[i], tpp, rootpath))
        
    concept2index = dict(zip(todo, range(nr_of_concepts)))
    ranklists = [[] for i in range(nr_of_concepts)]

    for line in open(tagvotefile):
        elems = line.strip().split()
        imageid = elems[0]
        del elems[0]
        assert(len(elems)%2==0)

        for i in range(0, len(elems), 2):
            tag = elems[i]
            c = concept2index.get(tag, -1)
            if c >= 0:
                if tagged and imageid not in labeled_set[c]:
                    continue
                score = float(elems[i+1])
                ranklists[c].append((imageid,score))

    for i in range(nr_of_concepts):
        concept = todo[i]
        resfile = os.path.join(resultdir, '%s.txt'%concept)
        ranklist = sorted(ranklists[i], key=lambda v:v[1], reverse=True)
        print ('%s %d -> %s' % (concept, len(ranklist), resfile))
        writeRankingResults(ranklist, resfile)
예제 #5
0
def process(options, collection, annotationName, runfile, newRunName):
    rootpath = options.rootpath
    overwrite = options.overwrite

    dataset = options.testset if options.testset else collection

    concepts = readConcepts(collection, annotationName, rootpath)
    simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset)

    data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")]
    models = []
    for line in data:
        weight, run = str.split(line)
        models.append((run, float(weight), 1))

    for concept in concepts:
        resultfile = os.path.join(simdir, newRunName, concept + ".txt")
        if checkToSkip(resultfile, overwrite):
            continue

        scorefile = os.path.join(simdir, models[0][0], concept + ".txt")
        if not os.path.exists(scorefile):
            print("%s does not exist. skip" % scorefile)
            continue

        ranklist = readRankingResults(scorefile)
        names = sorted([x[0] for x in ranklist])

        nr_of_images = len(names)
        name2index = dict(zip(names, range(nr_of_images)))

        print("%s %d" % (concept, nr_of_images))

        scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank)
        assert scoreTable.shape[1] == nr_of_images

        weights = [model[1] for model in models]

        scores = np.matrix(weights) * scoreTable
        scores = [float(scores[0, k]) for k in range(nr_of_images)]

        newranklist = [(names[i], scores[i]) for i in range(nr_of_images)]
        newranklist.sort(key=lambda v: (v[1], v[0]), reverse=True)

        writeRankingResults(newranklist, resultfile)
예제 #6
0
def process(options, collection, annotationName, runfile, newRunName):
    rootpath = options.rootpath
    overwrite = options.overwrite

    dataset = options.testset if options.testset else collection
    
    concepts = readConcepts(collection, annotationName, rootpath)
    simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset)

    data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")]
    models = []
    for line in data:
        weight,run = str.split(line)
        models.append((run, float(weight), 1))
    
    for concept in concepts:
        resultfile = os.path.join(simdir, newRunName, concept + ".txt")
        if checkToSkip(resultfile, overwrite):
            continue

        scorefile = os.path.join(simdir, models[0][0], concept + ".txt")
        if not os.path.exists(scorefile):
            print ("%s does not exist. skip" % scorefile)
            continue

        ranklist = readRankingResults(scorefile)
        names = sorted([x[0] for x in ranklist])

        nr_of_images = len(names)
        name2index = dict(zip(names, range(nr_of_images)))
   
        print ('%s %d' % (concept, nr_of_images))
        
        scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank)
        assert(scoreTable.shape[1] == nr_of_images)

        weights = [model[1] for model in models]

        scores = np.matrix(weights) * scoreTable
        scores = [float(scores[0,k]) for k in range(nr_of_images)]
  
        newranklist = [(names[i], scores[i]) for i in range(nr_of_images)]
        newranklist.sort(key=lambda v:(v[1],v[0]), reverse=True)
     
        writeRankingResults(newranklist, resultfile)
def submit(searchers,
           collection,
           annotationName,
           rootpath=ROOT_PATH,
           overwrite=0):
    concepts = readConcepts(collection, annotationName, rootpath=rootpath)
    nr_of_runs = len(searchers)

    for concept in concepts:
        for j in range(nr_of_runs):
            resultfile = os.path.join(searchers[j].getOutputdir(),
                                      concept + ".txt")
            if checkToSkip(resultfile, overwrite):
                continue
            searchresults = searchers[j].scoreCollection(concept)
            print("%s: %s %d -> %s" %
                  (searchers[j].name, concept, len(searchresults), resultfile))
            writeRankingResults(searchresults, resultfile)

    printStatus('%s.submit' % os.path.basename(__file__), "done")
예제 #8
0
def process(options, collection, annotationName, runfile):
    rootpath = options.rootpath
    overwrite = options.overwrite

    resultdir = os.path.join(rootpath, collection, 'SimilarityIndex',
                             collection)

    apscorer = getScorer('AP')
    datafiles = [
        x.strip() for x in open(runfile).readlines()
        if x.strip() and not x.strip().startswith('#')
    ]
    nr_of_runs = len(datafiles)

    concepts = readConcepts(collection, annotationName, rootpath=rootpath)
    nr_of_concepts = len(concepts)

    printStatus(INFO, 'read annotations from files')

    name2label = [{} for i in range(nr_of_concepts)]
    hit_imgset = [[] for i in range(nr_of_concepts)]

    for i in range(nr_of_concepts):
        names, labels = readAnnotationsFrom(collection,
                                            annotationName,
                                            concepts[i],
                                            skip_0=False,
                                            rootpath=rootpath)
        names = map(int, names)
        name2label[i] = dict(zip(names, labels))

        label_file = os.path.join(rootpath, collection, 'tagged,lemm',
                                  '%s.txt' % concepts[i])
        try:
            hit_imgset[i] = set(map(int, open(label_file).readlines()))
        except:
            hit_imgset[i] = set()
        printStatus(
            INFO, 'readLabeledImageSet for %s-%s -> %d hits' %
            (collection, concepts[i], len(hit_imgset[i])))

    ap_table = np.zeros((nr_of_runs, nr_of_concepts))
    ap2_table = np.zeros((nr_of_runs, nr_of_concepts))

    for run_idx in range(nr_of_runs):
        runName = os.path.splitext(os.path.basename(datafiles[run_idx]))[0]
        data = pickle.load(open(datafiles[run_idx], 'rb'))
        scores = data['scores']
        assert (scores.shape[1] == nr_of_concepts)
        imset = data['id_images']
        nr_of_images = len(imset)
        #print datafiles[run_idx], imset[:5], imset[-5:]

        for c_idx in range(nr_of_concepts):
            ground_truth = name2label[c_idx]
            ranklist = zip(imset, scores[:, c_idx])
            ranklist.sort(key=lambda v: (v[1], str(v[0])), reverse=True)
            ranklist = [x for x in ranklist if x[0] in ground_truth]

            resfile = os.path.join(resultdir, runName,
                                   '%s.txt' % concepts[c_idx])
            if not checkToSkip(resfile, overwrite):
                writeRankingResults(ranklist, resfile)
            sorted_labels = [ground_truth[x[0]] for x in ranklist]
            assert (len(sorted_labels) > 0)
            #print concepts[c_idx], ranklist[:5], sorted_labels[:5]

            ap_table[run_idx, c_idx] = apscorer.score(sorted_labels)

            resfile = os.path.join(resultdir, 'tagged,lemm', runName,
                                   '%s.txt' % concepts[c_idx])
            if not checkToSkip(resfile, overwrite):
                writeRankingResults(
                    [x for x in ranklist if x[0] in hit_imgset[c_idx]],
                    resfile)

            sorted_labels = [
                ground_truth[x[0]] for x in ranklist
                if x[0] in hit_imgset[c_idx]
            ]
            ap2_table[run_idx, c_idx] = apscorer.score(sorted_labels)

    print '#' * 100
    print '# untagged-concept', ' '.join(
        [os.path.basename(x) for x in datafiles])
    print '#' * 100

    for c_idx in range(nr_of_concepts):
        print concepts[c_idx], ' '.join(
            ['%.3f' % x for x in ap_table[:, c_idx]])
    print 'meanAP', ' '.join(['%.3f' % x for x in ap_table.mean(axis=1)])

    print '#' * 100
    print '# tagged-concept'
    print '#' * 100

    for c_idx in range(nr_of_concepts):
        print concepts[c_idx], ' '.join(
            ['%.3f' % x for x in ap2_table[:, c_idx]])
    print 'meanAP2', ' '.join(['%.3f' % x for x in ap2_table.mean(axis=1)])
예제 #9
0
def process(options, collection, annotationName, runfile):
    rootpath = options.rootpath
    overwrite = options.overwrite

    resultdir = os.path.join(rootpath, collection, 'SimilarityIndex', collection)

    apscorer = getScorer('AP')
    datafiles = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith('#')]
    nr_of_runs = len(datafiles)
    
    concepts = readConcepts(collection, annotationName, rootpath=rootpath)  
    nr_of_concepts = len(concepts)
    
    printStatus(INFO, 'read annotations from files')
    
    name2label = [{} for i in range(nr_of_concepts)]
    hit_imgset = [[] for i in range(nr_of_concepts)]
    
    for i in range(nr_of_concepts):
        names,labels = readAnnotationsFrom(collection, annotationName, concepts[i], skip_0=False, rootpath=rootpath)
        names = map(int, names)
        name2label[i] = dict(zip(names,labels))

        label_file = os.path.join(rootpath, collection, 'tagged,lemm', '%s.txt'% concepts[i])
        try:
            hit_imgset[i] = set(map(int, open(label_file).readlines()))
        except:
            hit_imgset[i] = set()
        printStatus(INFO, 'readLabeledImageSet for %s-%s -> %d hits' % (collection, concepts[i], len(hit_imgset[i])))
        
    ap_table = np.zeros((nr_of_runs, nr_of_concepts))
    ap2_table = np.zeros((nr_of_runs, nr_of_concepts))
    
    for run_idx in range(nr_of_runs):
        runName = os.path.splitext(os.path.basename(datafiles[run_idx]))[0]
        data = pickle.load(open(datafiles[run_idx],'rb'))
        scores = data['scores']
        assert(scores.shape[1] == nr_of_concepts)
        imset = data['id_images']
        nr_of_images = len(imset)
        #print datafiles[run_idx], imset[:5], imset[-5:]
                   
        for c_idx in range(nr_of_concepts):
            ground_truth = name2label[c_idx]
            ranklist =  zip(imset, scores[:,c_idx])
            ranklist.sort(key=lambda v:(v[1], str(v[0])), reverse=True)
            ranklist = [x for x in ranklist if x[0] in ground_truth]

            resfile = os.path.join(resultdir, runName, '%s.txt' % concepts[c_idx])
            if not checkToSkip(resfile, overwrite):
                writeRankingResults(ranklist, resfile)            
            sorted_labels = [ground_truth[x[0]] for x in ranklist]
            assert(len(sorted_labels)>0)
            #print concepts[c_idx], ranklist[:5], sorted_labels[:5]

            ap_table[run_idx, c_idx] = apscorer.score(sorted_labels)

            resfile = os.path.join(resultdir, 'tagged,lemm', runName, '%s.txt' % concepts[c_idx])
            if not checkToSkip(resfile, overwrite):
                writeRankingResults([x for x in ranklist if x[0] in hit_imgset[c_idx]], resfile)            
            
            sorted_labels = [ground_truth[x[0]] for x in ranklist if x[0] in hit_imgset[c_idx]]
            ap2_table[run_idx, c_idx] = apscorer.score(sorted_labels)
     

    print '#'*100
    print '# untagged-concept', ' '.join([os.path.basename(x) for x in datafiles])
    print '#'*100
            
    for c_idx in range(nr_of_concepts):
        print concepts[c_idx], ' '.join(['%.3f' % x for x in ap_table[:,c_idx]])
    print 'meanAP', ' '.join(['%.3f' % x for x in ap_table.mean(axis=1)])
    
    print '#'*100
    print '# tagged-concept'
    print '#'*100
    
    for c_idx in range(nr_of_concepts):
        print concepts[c_idx], ' '.join(['%.3f' % x for x in ap2_table[:,c_idx]])
    print 'meanAP2', ' '.join(['%.3f' % x for x in ap2_table.mean(axis=1)])
예제 #10
0
    print 'training %s' % tag
    from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train
    from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear
    svm_params = '-s 2 -B -1 -q'
    model = train(y, vectors, svm_params)
    fastmodel = liblinear_to_fastlinear([model], [1.0], feat_dim)

    # optionally save the learned model to disk
    from model_based.svms.fastlinear.fastlinear import fastlinear_save_model
    model_dir = os.path.join(rootpath, trainCollection, 'Models',
                             trainAnnotationName, feature, 'fastlinear')
    model_filename = os.path.join(model_dir, '%s.model' % tag)

    from basic.common import makedirsforfile
    makedirsforfile(model_filename)
    fastlinear_save_model(model_filename, fastmodel)

    print 'applying %s' % tag
    from model_based.svms.mlengine_util import classify_large_data
    ranklist = classify_large_data(fastmodel, test_imset, test_feat_file)
    #predict_scores = [fastmodel.predict(x) for x in test_vectors]
    #ranklist = sorted(zip(test_renamed, predict_scores), key=lambda v:(v[1],v[0]), reverse=True)

    from basic.common import writeRankingResults
    simdir = os.path.join(rootpath, testCollection, 'SimilarityIndex',
                          testCollection, trainCollection,
                          'conceptsmm15tut.txt', '%s,fastlinear' % feature)
    resultfile = os.path.join(simdir, '%s.txt' % tag)
    writeRankingResults(ranklist, resultfile)
예제 #11
0
def process(options, trainCollection, testCollection, feature):
    rootpath = options.rootpath
    k = options.k
    distance = options.distance
    blocksize = options.blocksize
    uniqueUser = options.uu
    numjobs = options.numjobs
    job = options.job
    overwrite = options.overwrite
    testset = options.testset
    if not testset:
        testset = testCollection

    searchMethod = distance + 'knn'
    if uniqueUser:
        searchMethod += ",uu"
        tagfile = os.path.join(rootpath, trainCollection, 'TextData', 'id.userid.lemmtags.txt')
        im2user = {}
        for line in open(tagfile):
            im,userid,tags = line.split('\t')
            im2user[im] = userid
    
    resultdir = os.path.join(rootpath, testCollection, "SimilarityIndex", testset, trainCollection, "%s,%s,%d" % (feature,searchMethod,k))
    feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData', feature)
    id_file = os.path.join(feat_dir, 'id.txt')
    shape_file = os.path.join(feat_dir, 'shape.txt')
    nr_of_images, feat_dim = map(int, open(shape_file).readline().split())
    nr_of_images = len(open(id_file).readline().strip().split())
    searcher = imagesearch.load_model(os.path.join(feat_dir, 'feature.bin'), feat_dim, nr_of_images, id_file)
    searcher.set_distance(distance)
        
    workingSet = readImageSet(testCollection, testset, rootpath=rootpath)
    workingSet = [workingSet[i] for i in range(len(workingSet)) if (i%numjobs+1) == job]
    printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,len(workingSet),resultdir))
    
    test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature)
    test_feat_file = BigFile(test_feat_dir)

    read_time = 0
    knn_time = 0
    start = 0
    done = 0
    filtered = 0

    while start < len(workingSet):
        end = min(len(workingSet), start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end-1))

        s_time = time.time()
        renamed,vectors = test_feat_file.read(workingSet[start:end])
        read_time += time.time() - s_time
        nr_images = len(renamed)
        
        s_time = time.time()
        for i in range(nr_images):
            resultfile = os.path.join(resultdir, renamed[i][-2:], '%s.txt' % renamed[i])
            if checkToSkip(resultfile, overwrite):
                continue
            knn = searcher.search_knn(vectors[i], max_hits=max(3000,k*3))
            if uniqueUser:
                removed, newknn = unique_user_constraint(knn, im2user, k)
                filtered += removed
                knn = newknn
            else:
                knn = knn[:k]
            assert(len(knn) >= k)
            writeRankingResults(knn, resultfile)
            done += 1
        printStatus(INFO, 'job %d-%d: %d done, filtered neighbors %d' % (numjobs, job, done, filtered))
        start = end

    printStatus(INFO, 'job %d-%d: %d done, filtered neighbors %d' % (numjobs, job, done, filtered))
예제 #12
0
    (renamed, vectors) = train_feat_file.read(names)
    y = [name2label[x] for x in renamed]
    
    print 'training %s' % tag
    from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train
    from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear
    svm_params = '-s 2 -B -1 -q'
    model = train(y, vectors, svm_params)
    fastmodel = liblinear_to_fastlinear([model], [1.0], feat_dim)

    # optionally save the learned model to disk
    from model_based.svms.fastlinear.fastlinear import fastlinear_save_model
    model_dir = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, 'fastlinear')
    model_filename = os.path.join(model_dir, '%s.model' % tag)
    
    from basic.common import makedirsforfile
    makedirsforfile(model_filename)
    fastlinear_save_model(model_filename, fastmodel)

    print 'applying %s' % tag
    from model_based.svms.mlengine_util import classify_large_data
    ranklist = classify_large_data(fastmodel, test_imset, test_feat_file)    
    #predict_scores = [fastmodel.predict(x) for x in test_vectors]
    #ranklist = sorted(zip(test_renamed, predict_scores), key=lambda v:(v[1],v[0]), reverse=True)
    
    from basic.common import writeRankingResults
    simdir = os.path.join(rootpath, testCollection, 'SimilarityIndex', testCollection, trainCollection, 'conceptsmm15tut.txt', '%s,fastlinear'%feature)
    resultfile = os.path.join(simdir, '%s.txt' % tag)
    writeRankingResults(ranklist, resultfile)