Ejemplo n.º 1
def process(options, collection, feature):
    rootpath = options.rootpath
    tpp = options.tpp
    k = 1000  # options.k
    numjobs = options.numjobs
    job = options.job
    overwrite = options.overwrite

    feat_dir = os.path.join(rootpath, collection, "FeatureData", feature)
    feat_file = BigFile(feat_dir)
    hitlists = buildHitLists(collection, tpp, rootpath)
    printStatus(INFO, "nr of tags: %d" % len(hitlists))

    vob = sorted(hitlists.keys())
    vob = [vob[i] for i in range(len(vob)) if i % numjobs == job - 1]
    printStatus(INFO, "working on %d-%d: %d tags" % (numjobs, job, len(vob)))

    for tag_idx, tag in enumerate(vob):
        resultdir = os.path.join(rootpath, collection, "FeatureIndex", feature, tag[:2], tag)
        binfile = os.path.join(resultdir, "feature.bin")
        if checkToSkip(binfile, overwrite):

        hitlist = hitlists[tag]
        hitlist = hitlist[:k]  # keep at most 1000 images per tag
        renamed, vecs = feat_file.read(hitlist)

        idfile = os.path.join(resultdir, "id.txt")
        fw = open(idfile, "w")
        fw.write(" ".join(renamed))

        shapefile = os.path.join(resultdir, "shape.txt")
        fw = open(shapefile, "w")
        fw.write("%d %d" % (len(renamed), len(vecs[0])))

        if tag_idx % 1e3 == 0:
            printStatus(INFO, "%d - %s, %d images" % (tag_idx, tag, len(hitlist)))
Ejemplo n.º 3
def process(options, trainCollection, annotationfile, feature, modelName):
    assert(modelName in ['fik', 'fastlinear'])
    rootpath = options.rootpath
    autoweight = 1 #options.autoweight
    beta = 0.5
    C = 1
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job

    params = {'rootpath': rootpath, 'model': modelName}
    if 'fik' == modelName:
        from svms.fiksvm.svmutil import svm_train as train_model
        from svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model
        from svms.fiksvm.fiksvm import fiksvm_save_model as save_model
        from svms.fiksvm.svm import KERNEL_TYPE

        nr_bins = options.nr_bins
        modelName += str(nr_bins)
        params['nr_bins'] = nr_bins
        minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData', feature, 'minmax.txt')
        with open(minmax_file, 'r') as f:
            params['min_vals'] = map(float, str.split(f.readline()))
            params['max_vals'] = map(float, str.split(f.readline()))    
        from svms.fastlinear.liblinear193.python.liblinearutil import train as train_model
        from svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model
        from svms.fastlinear.fastlinear import fastlinear_save_model as save_model
    newAnnotationName = os.path.split(annotationfile)[-1]
    trainAnnotationNames = [x.strip() for x in open(annotationfile).readlines() if x.strip() and not x.strip().startswith('#')]
    for annotationName in trainAnnotationNames:
        conceptfile = os.path.join(rootpath, trainCollection, 'Annotations', annotationName)
        if not os.path.exists(conceptfile):
            print '%s does not exist' % conceptfile
            return 0

    concepts = readConcepts(trainCollection, trainAnnotationNames[0], rootpath=rootpath)

    resultdir = os.path.join(rootpath, trainCollection, 'Models', newAnnotationName, feature, modelName)
    todo = []
    for concept in concepts:
        resultfile = os.path.join(resultdir, concept + '.model')
        if not checkToSkip(resultfile, overwrite):
    todo = [todo[i] for i in range(len(todo)) if i%numjobs==(job-1)]
    printStatus(INFO, 'to process %d concepts: %s' % (len(todo), ' '.join(todo)))
    if not todo:
        return 0

    train_feat_file = BigFile(os.path.join(rootpath,trainCollection,'FeatureData',feature))
    feat_dim = train_feat_file.ndims

    s_time = time.time()

    for concept in todo:
        assemble_model = None
        for t in range(1, len(trainAnnotationNames)+1):
            names,labels = readAnnotationsFrom(trainCollection, trainAnnotationNames[t-1], concept, skip_0=True, rootpath=rootpath)
            name2label = dict(zip(names,labels))
            renamed,vectors = train_feat_file.read(names)
            Ys = [name2label[x] for x in renamed]
            np = len([1 for lab in labels if  1 == lab])
            nn = len([1 for lab in labels if  -1== lab])
            wp = float(beta) * (np+nn) / np
            wn = (1.0-beta) * (np+nn) /nn
            if autoweight:
                svm_params = '-w1 %g -w-1 %g' % (wp*C, wn*C) 
                svm_params = '-c %g' % C
            if modelName.startswith('fik'):
                svm_params += ' -s 0 -t %d' % KERNEL_TYPE.index("HI")
                svm_params += ' -s 2 -B -1 '
            g_t = train_model(Ys, vectors, svm_params + ' -q')
            if t == 1:
                assemble_model = compress_model([g_t], [1.0], feat_dim, params)
                new_model = compress_model([g_t], [1.0], feat_dim, params)
                assemble_model.add_fastsvm(new_model, 1-1.0/t, 1.0/t)

        new_model_file = os.path.join(resultdir, '%s.model' % concept)            
        printStatus(INFO, 'save model to %s' % new_model_file)
        save_model(new_model_file, assemble_model)
        printStatus(INFO, '%s done' % concept)

    timecost = time.time() - s_time
    writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath)
    printStatus(INFO, 'done for %g concepts: %s' % (len(todo), ' '.join(todo)))
    printStatus(INFO, 'models stored at %s' % resultdir)
    printStatus(INFO, '%g seconds in total' % timecost)
Ejemplo n.º 4
def process(options, trainCollection, trainAnnotationName, feature):
    import re
    p = re.compile(r'best_C=(?P<C>[\.\d]+),\sa=(?P<a>[\.\-\d]+),\sb=(?P<b>[\.\-\d]+)')

    rootpath = options.rootpath
    best_param_dir = options.best_param_dir
    overwrite = options.overwrite
    #autoweight = options.autoweight
    numjobs = options.numjobs
    job = options.job
    beta = 0.5
    modelName = 'fastlinear'
    if best_param_dir:
        modelName += '-tuned'
    concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath)
    resultdir = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, modelName)
    todo = []
    for concept in concepts:
        resultfile = os.path.join(resultdir, concept + '.model')
        if not checkToSkip(resultfile, overwrite):
    todo = [todo[i] for i in range(len(todo)) if i%numjobs==(job-1)]
    if not todo:
        return 0

    printStatus(INFO, 'to process %d concepts: %s' % (len(todo), ' '.join(todo)))
    feat_file = BigFile(os.path.join(rootpath,trainCollection,'FeatureData',feature))
    for concept in todo:
        if best_param_dir:
            param_file = os.path.join(best_param_dir, '%s.txt' % concept)
            m = p.search(open(param_file).readline().strip())
            C = float(m.group('C'))
            A = float(m.group('a'))
            B = float(m.group('b'))
            C = 1
            A = 0
            B = 0
        printStatus(INFO, '%s, C=%g, A=%g, B=%g' % (concept, C, A, B))
        model_file_name = os.path.join(resultdir, concept + '.model')
        names,labels = readAnnotationsFrom(trainCollection, trainAnnotationName, concept, skip_0=True, rootpath=rootpath)
        name2label = dict(zip(names,labels))
        renamed,vectors = feat_file.read(names)
        y = [name2label[x] for x in renamed]
        np = len([1 for lab in labels if  1 == lab])
        nn = len([1 for lab in labels if  -1== lab])
        wp = float(beta) * (np+nn) / np
        wn = (1.0-beta) * (np+nn) /nn
        # no bias term added by setting "-B -1"
        svm_params = '-w1 %g -w-1 %g -s 2 -B -1 -q' % (wp*C, wn*C) 
        model = liblinear_train(y, vectors, svm_params)
        newmodel = liblinear_to_fastlinear([model], [1.0], feat_file.ndims)
        newmodel.set_probAB(A, B)
        printStatus(INFO, '-> %s'%model_file_name)
        fastlinear_save_model(model_file_name, newmodel)

        # reload the model file to do a simple check

    return len(todo)
Ejemplo n.º 5
def process(options, testCollection, trainCollection, annotationName, feature):
    rootpath = options.rootpath
    k = options.k
    distance = options.distance
    blocksize = options.blocksize
    donefile = options.donefile
    numjobs = options.numjobs
    job = options.job
    overwrite = options.overwrite
    taggerType = options.tagger
    noise = options.noise
    testset = options.testset
    if not testset:
        testset = testCollection

    modelName = taggerType
    if 'pretagvote' == taggerType and noise > 1e-3:
        modelName += '-noise%.2f' % noise
    if 'pqtagvote' == taggerType:
        nnName = "l2knn"
        nnName = distance + "knn"
    resultfile = os.path.join(rootpath, testCollection, 'autotagging', testset,
                              trainCollection, annotationName, modelName,
                              '%s,%s,%d' % (feature, nnName, k),

    if numjobs > 1:
        resultfile += ".%d.%d" % (numjobs, job)
    if checkToSkip(resultfile, overwrite):
        return 0

    if donefile:
        doneset = set([x.split()[0] for x in open(donefile) if x.strip()])
        doneset = set()
        INFO, "%d images have been done already, and they will be ignored" %

    workingSet = readImageSet(testCollection, testset, rootpath)
    workingSet = [x for x in workingSet if x not in doneset]
    workingSet = [
        workingSet[i] for i in range(len(workingSet))
        if (i % numjobs + 1) == job

    test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData',
    test_feat_file = BigFile(test_feat_dir)

    tagger = NAME_TO_TAGGER[taggerType](trainCollection,
    tagger.k = k
    tagger.noise = noise

        INFO, "working on %d-%d, %d test images -> %s" %
        (numjobs, job, len(workingSet), resultfile))

    fw = open(resultfile, "w")

    read_time = 0.0
    test_time = 0.0
    start = 0
    done = 0

    while start < len(workingSet):
        end = min(len(workingSet), start + blocksize)
        printStatus(INFO, 'tagging images from %d to %d' % (start, end - 1))

        s_time = time.time()
        renamed, vectors = test_feat_file.read(workingSet[start:end])
        nr_images = len(renamed)
        read_time += time.time() - s_time

        s_time = time.time()
        output = [None] * nr_images
        for i in range(nr_images):
            tagvotes = tagger.predict(content=vectors[i],
                                      context='%s,%s' %
                                      (testCollection, renamed[i]))
            output[i] = '%s %s\n' % (renamed[i], " ".join([
                "%s %s" % (tag, niceNumber(vote, 6))
                for (tag, vote) in tagvotes
        test_time += time.time() - s_time
        start = end
        done += len(output)

        INFO, '%d images tagged, read time %g seconds, test time %g seconds' %
        (done, read_time, test_time))
Ejemplo n.º 6
def process(options, trainCollection, annotationfile, feature, modelName):
    assert (modelName in ['fik', 'fastlinear'])
    rootpath = options.rootpath
    autoweight = 1  #options.autoweight
    beta = 0.5
    C = 1
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job

    params = {'rootpath': rootpath, 'model': modelName}

    if 'fik' == modelName:
        from svms.fiksvm.svmutil import svm_train as train_model
        from svms.fiksvm.fiksvm import svm_to_fiksvm as compress_model
        from svms.fiksvm.fiksvm import fiksvm_save_model as save_model
        from svms.fiksvm.svm import KERNEL_TYPE

        nr_bins = options.nr_bins
        modelName += str(nr_bins)
        params['nr_bins'] = nr_bins
        minmax_file = os.path.join(rootpath, trainCollection, 'FeatureData',
                                   feature, 'minmax.txt')
        with open(minmax_file, 'r') as f:
            params['min_vals'] = map(float, str.split(f.readline()))
            params['max_vals'] = map(float, str.split(f.readline()))
        from svms.fastlinear.liblinear193.python.liblinearutil import train as train_model
        from svms.fastlinear.fastlinear import liblinear_to_fastlinear as compress_model
        from svms.fastlinear.fastlinear import fastlinear_save_model as save_model

    newAnnotationName = os.path.split(annotationfile)[-1]
    trainAnnotationNames = [
        x.strip() for x in open(annotationfile).readlines()
        if x.strip() and not x.strip().startswith('#')
    for annotationName in trainAnnotationNames:
        conceptfile = os.path.join(rootpath, trainCollection, 'Annotations',
        if not os.path.exists(conceptfile):
            print '%s does not exist' % conceptfile
            return 0

    concepts = readConcepts(trainCollection,

    resultdir = os.path.join(rootpath, trainCollection, 'Models',
                             newAnnotationName, feature, modelName)
    todo = []
    for concept in concepts:
        resultfile = os.path.join(resultdir, concept + '.model')
        if not checkToSkip(resultfile, overwrite):
    todo = [todo[i] for i in range(len(todo)) if i % numjobs == (job - 1)]
                'to process %d concepts: %s' % (len(todo), ' '.join(todo)))
    if not todo:
        return 0

    train_feat_file = BigFile(
        os.path.join(rootpath, trainCollection, 'FeatureData', feature))
    feat_dim = train_feat_file.ndims

    s_time = time.time()

    for concept in todo:
        assemble_model = None
        for t in range(1, len(trainAnnotationNames) + 1):
            names, labels = readAnnotationsFrom(trainCollection,
                                                trainAnnotationNames[t - 1],
            name2label = dict(zip(names, labels))
            renamed, vectors = train_feat_file.read(names)
            Ys = [name2label[x] for x in renamed]
            np = len([1 for lab in labels if 1 == lab])
            nn = len([1 for lab in labels if -1 == lab])
            wp = float(beta) * (np + nn) / np
            wn = (1.0 - beta) * (np + nn) / nn

            if autoweight:
                svm_params = '-w1 %g -w-1 %g' % (wp * C, wn * C)
                svm_params = '-c %g' % C

            if modelName.startswith('fik'):
                svm_params += ' -s 0 -t %d' % KERNEL_TYPE.index("HI")
                svm_params += ' -s 2 -B -1 '

            g_t = train_model(Ys, vectors, svm_params + ' -q')
            if t == 1:
                assemble_model = compress_model([g_t], [1.0], feat_dim, params)
                new_model = compress_model([g_t], [1.0], feat_dim, params)
                assemble_model.add_fastsvm(new_model, 1 - 1.0 / t, 1.0 / t)

        new_model_file = os.path.join(resultdir, '%s.model' % concept)
        printStatus(INFO, 'save model to %s' % new_model_file)
        save_model(new_model_file, assemble_model)
        printStatus(INFO, '%s done' % concept)

    timecost = time.time() - s_time
    writeConceptsTo(concepts, trainCollection, newAnnotationName, rootpath)
    printStatus(INFO, 'done for %g concepts: %s' % (len(todo), ' '.join(todo)))
    printStatus(INFO, 'models stored at %s' % resultdir)
    printStatus(INFO, '%g seconds in total' % timecost)
Ejemplo n.º 7
def process(options, testCollection, trainCollection, trainAnnotationName,
            feature, modelName):
    if modelName.startswith('fik'):
        from fiksvm.fiksvm import fiksvm_load_model as load_model
        from fastlinear.fastlinear import fastlinear_load_model as load_model

    rootpath = options.rootpath
    overwrite = options.overwrite
    prob_output = options.prob_output
    numjobs = options.numjobs
    job = options.job
    blocksize = options.blocksize

    outputName = '%s,%s' % (feature, modelName)
    if prob_output:
        outputName += ',prob'

    resultfile = os.path.join(rootpath, testCollection, 'autotagging',
                              testCollection, trainCollection,
                              trainAnnotationName, outputName,
    if numjobs > 1:
        resultfile += '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    concepts = readConcepts(trainCollection,
    nr_of_concepts = len(concepts)

    test_imset = readImageSet(testCollection, testCollection, rootpath)
    test_imset = [
        test_imset[i] for i in range(len(test_imset)) if i % numjobs + 1 == job
    nr_of_test_images = len(test_imset)
        INFO, "working on %d-%d, %d test images -> %s" %
        (numjobs, job, nr_of_test_images, resultfile))

    models = [None] * nr_of_concepts
    for c in range(nr_of_concepts):
        model_file_name = os.path.join(rootpath, trainCollection, 'Models',
                                       trainAnnotationName, feature, modelName,
                                       '%s.model' % concepts[c])
        models[c] = load_model(model_file_name)
        if models[c] is None:
            return 0
        #(pA,pB) = model.get_probAB()

    feat_file = BigFile(
        os.path.join(rootpath, testCollection, "FeatureData", feature))
    fw = open(resultfile, "w")

    read_time = 0
    test_time = 0
    start = 0
    done = 0

    while start < nr_of_test_images:
        end = min(nr_of_test_images, start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end - 1))

        s_time = time.time()
        renamed, test_X = feat_file.read(test_imset[start:end])
        read_time += time.time() - s_time

        s_time = time.time()
        output = [None] * len(renamed)
        for i in xrange(len(renamed)):
            if prob_output:
                scores = [
                    for c in range(nr_of_concepts)
                scores = [
                    models[c].predict(test_X[i]) for c in range(nr_of_concepts)
            #dec_value = sigmoid_predict(dec_value, A=pA, B=pB)
            tagvotes = sorted(zip(concepts, scores),
                              key=lambda v: v[1],
            output[i] = '%s %s\n' % (renamed[i], " ".join([
                "%s %s" % (tag, niceNumber(vote, 6))
                for (tag, vote) in tagvotes
        test_time += time.time() - s_time
        start = end
        done += len(output)

    # done
        INFO, "%d done. read time %g seconds, test_time %g seconds" %
        (done, read_time, test_time))
    return done
Ejemplo n.º 10
def process(options, trainCollection, testCollection, feature):
    rootpath = options.rootpath
    k = options.k
    distance = options.distance
    blocksize = options.blocksize
    uniqueUser = options.uu
    numjobs = options.numjobs
    job = options.job
    overwrite = options.overwrite
    testset = options.testset
    if not testset:
        testset = testCollection

    searchMethod = distance + 'knn'
    if uniqueUser:
        searchMethod += ",uu"
        tagfile = os.path.join(rootpath, trainCollection, 'TextData', 'id.userid.lemmtags.txt')
        im2user = {}
        for line in open(tagfile):
            im,userid,tags = line.split('\t')
            im2user[im] = userid
    resultdir = os.path.join(rootpath, testCollection, "SimilarityIndex", testset, trainCollection, "%s,%s,%d" % (feature,searchMethod,k))
    feat_dir = os.path.join(rootpath, trainCollection, 'FeatureData', feature)
    id_file = os.path.join(feat_dir, 'id.txt')
    shape_file = os.path.join(feat_dir, 'shape.txt')
    nr_of_images, feat_dim = map(int, open(shape_file).readline().split())
    nr_of_images = len(open(id_file).readline().strip().split())
    searcher = imagesearch.load_model(os.path.join(feat_dir, 'feature.bin'), feat_dim, nr_of_images, id_file)
    workingSet = readImageSet(testCollection, testset, rootpath=rootpath)
    workingSet = [workingSet[i] for i in range(len(workingSet)) if (i%numjobs+1) == job]
    printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,len(workingSet),resultdir))
    test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature)
    test_feat_file = BigFile(test_feat_dir)

    read_time = 0
    knn_time = 0
    start = 0
    done = 0
    filtered = 0

    while start < len(workingSet):
        end = min(len(workingSet), start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end-1))

        s_time = time.time()
        renamed,vectors = test_feat_file.read(workingSet[start:end])
        read_time += time.time() - s_time
        nr_images = len(renamed)
        s_time = time.time()
        for i in range(nr_images):
            resultfile = os.path.join(resultdir, renamed[i][-2:], '%s.txt' % renamed[i])
            if checkToSkip(resultfile, overwrite):
            knn = searcher.search_knn(vectors[i], max_hits=max(3000,k*3))
            if uniqueUser:
                removed, newknn = unique_user_constraint(knn, im2user, k)
                filtered += removed
                knn = newknn
                knn = knn[:k]
            assert(len(knn) >= k)
            writeRankingResults(knn, resultfile)
            done += 1
        printStatus(INFO, 'job %d-%d: %d done, filtered neighbors %d' % (numjobs, job, done, filtered))
        start = end

    printStatus(INFO, 'job %d-%d: %d done, filtered neighbors %d' % (numjobs, job, done, filtered))
Ejemplo n.º 11

from model_based.dataengine.positiveengine import PositiveEngine
from model_based.dataengine.negativeengine import NegativeEngine

pe = PositiveEngine(trainCollection)
ne = NegativeEngine(trainCollection)

for tag in test_tags:
    pos_set = pe.sample(tag, 100)
    neg_set = ne.sample(tag, 100)
    names = pos_set + neg_set
    labels = [1] * len(pos_set) + [-1] * len(neg_set)
    name2label = dict(zip(names,labels))
    (renamed, vectors) = train_feat_file.read(names)
    y = [name2label[x] for x in renamed]
    print 'training %s' % tag
    from model_based.svms.fastlinear.liblinear193.python.liblinearutil import train
    from model_based.svms.fastlinear.fastlinear import liblinear_to_fastlinear
    svm_params = '-s 2 -B -1 -q'
    model = train(y, vectors, svm_params)
    fastmodel = liblinear_to_fastlinear([model], [1.0], feat_dim)

    # optionally save the learned model to disk
    from model_based.svms.fastlinear.fastlinear import fastlinear_save_model
    model_dir = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, 'fastlinear')
    model_filename = os.path.join(model_dir, '%s.model' % tag)
    from basic.common import makedirsforfile
Ejemplo n.º 12
def process(options, testCollection, trainCollection, feature):
    rootpath = options.rootpath
    overwrite = options.overwrite
    tpp = options.tpp
    doRandomwalk = 1  #options.doRandomwalk
    uniqueUser = 0  #options.uniqueUser
    k = 1000  #options.k
    numjobs = options.numjobs
    job = options.job

    #resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection,
    #                          "%s,tagrank%d%d,%d,%s" % (feature,doRandomwalk,uniqueUser,k,tpp), "id.tagvotes.txt")

    resultfile = os.path.join(rootpath, testCollection, "tagrel",
                              testCollection, trainCollection,
                              '%s,tagrank,%s' % (feature, tpp),

    if numjobs > 1:
        resultfile = resultfile + '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):

        doneset = set(
            [str.split(x)[0] for x in open(options.donefile).readlines()[:-1]])
        doneset = set()

    printStatus(INFO, "done set: %d" % len(doneset))

    testImageSet = readImageSet(testCollection, testCollection, rootpath)
    testImageSet = [x for x in testImageSet if x not in doneset]
    testImageSet = [
        testImageSet[i] for i in range(len(testImageSet))
        if (i % numjobs + 1) == job
        INFO, 'working on %d-%d, %d test images -> %s' %
        (numjobs, job, len(testImageSet), resultfile))

    testreader = TagReader(testCollection, rootpath=rootpath)
    test_feat_file = BigFile(
        os.path.join(rootpath, testCollection, 'FeatureData', feature))
    block_size = 100

    tagranking = TagRanking(trainCollection,

    fw = open(resultfile, "w")

    done = 0

    nr_of_blocks = len(testImageSet) / block_size
    if nr_of_blocks * block_size < len(testImageSet):
        nr_of_blocks += 1

    for block_index in range(nr_of_blocks):
        start = block_index * block_size
        end = min(len(testImageSet), start + block_size)
        subset = testImageSet[start:end]
        if not subset:
        renamed, features = test_feat_file.read(subset)
        printStatus(INFO, '%d - %d: %d images' % (start, end, len(subset)))

        output = []
        for i in range(len(renamed)):
            qry_id = renamed[i]
            qry_tags = testreader.get(qry_id)
            qry_vec = features[i]
            tagvotes = tagranking.estimate(
                qry_tags)  #, uniqueUser=uniqueUser, doRandomwalk=doRandomwalk)
            newline = "%s %s" % (qry_id, " ".join(
                ["%s %g" % (x[0], x[1]) for x in tagvotes]))
            output.append(newline + "\n")
            done += 1

        #printStatus(INFO, '%d %s %s' % (done,qry_id,' '.join(['%s:%g' % (x[0],x[1]) for x in tagvotes[:3]] )))

    printStatus(INFO, 'done')
