Exemple #1
0
def process(options, testCollection, trainCollection, tagsimMethod):
    rootpath = options.rootpath
    overwrite = options.overwrite
    testsetName = options.testset if options.testset else testCollection 
    tpp = options.tpp
    numjobs = options.numjobs
    job = options.job
    useWnVob = 1

    outputName = tagsimMethod + '-wn' if useWnVob else tagsimMethod

    if tagsimMethod == 'wns':
        resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, outputName,'id.tagvotes.txt')
    else:    
        resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, trainCollection, outputName,'id.tagvotes.txt')
    if numjobs>1:
        resultfile = resultfile.replace("id.tagvotes.txt", "id.tagvotes.%d.%d.txt" % (numjobs,job))

    if checkToSkip(resultfile, overwrite):
        sys.exit(0)

    makedirsforfile(resultfile)

    try:
        doneset = set([str.split(x)[0] for x in open(options.donefile).readlines()[:-1]])
    except:
        doneset = set()
        
    printStatus(INFO, "done set: %d" % len(doneset))

 
    testImageSet = readImageSet(testCollection, testCollection, rootpath)
    testImageSet = [x for x in testImageSet if x not in doneset]
    testImageSet = [testImageSet[i] for i in range(len(testImageSet)) if (i%numjobs+1) == job]
    printStatus(INFO, 'working on %d-%d, %d test images -> %s' % (numjobs,job,len(testImageSet),resultfile) )
    
    testreader = TagReader(testCollection, rootpath=rootpath)    

    if tagsimMethod == "wns":
        tagrel = SIM_TO_TAGREL["wns"](trainCollection, useWnVob, "wup", rootpath)
    else:
        tagrel = SIM_TO_TAGREL[tagsimMethod](trainCollection, useWnVob, rootpath)

 
    done = 0
    fw = open(resultfile, "w")
    
    for qry_id in testImageSet:
        qry_tags = testreader.get(qry_id)    
        tagvotes = tagrel.estimate(qry_tags)
        newline = qry_id + " " + " ".join(["%s %s" % (tag, niceNumber(vote,8)) for (tag,vote) in tagvotes])
        fw.write(newline+"\n")
        done += 1
        if done%1000 == 0:
            printStatus(INFO, "%d done" % done)
    # done    
    fw.close()
    printStatus(INFO, "%d done" % done)
Exemple #2
0
def process(options, collection):
    rootpath = options.rootpath
    tpp = options.tpp
    overwrite = options.overwrite

    resultfile = os.path.join(rootpath, collection, "tagrel", collection,
                              'tagpos,%s' % tpp, 'id.tagvotes.txt')
    if checkToSkip(resultfile, overwrite):
        sys.exit(0)

    imset = readImageSet(collection, collection, rootpath)
    printStatus(INFO,
                'working on %d test images -> %s' % (len(imset), resultfile))

    reader = TagReader(collection, tpp=tpp, rootpath=rootpath)

    makedirsforfile(resultfile)
    fw = open(resultfile, "w")
    output = []
    done = 0

    for im in imset:
        tags = reader.get(im)
        tagSet = set()
        tagSeq = []
        for tag in str.split(tags):
            if tag not in tagSet:
                tagSeq.append(tag)
                tagSet.add(tag)
        assert (len(tagSeq) == len(tagSet))

        nr_tags = len(tagSeq)
        tagvotes = [(tagSeq[i], 1.0 - float(i) / nr_tags)
                    for i in range(nr_tags)]
        newline = "%s %s" % (im, " ".join(
            ["%s %g" % (x[0], x[1]) for x in tagvotes]))
        output.append(newline + "\n")
        done += 1

        if len(output) % 1e4 == 0:
            printStatus(
                INFO, '%d %s %s' % (done, im, ' '.join(
                    ['%s:%g' % (x[0], x[1]) for x in tagvotes[:3]])))
            fw.write("".join(output))
            fw.flush()
            output = []

    if output:
        fw.write("".join(output))
    fw.close()
    printStatus(INFO, 'done')
Exemple #3
0
def process(options, collection):
    rootpath = options.rootpath
    tpp = options.tpp
    overwrite = options.overwrite

    
    resultfile = os.path.join(rootpath, collection, "tagrel", collection, 'tagpos,%s'%tpp, 'id.tagvotes.txt')
    if checkToSkip(resultfile, overwrite):
        sys.exit(0)    

    imset = readImageSet(collection, collection, rootpath)
    printStatus(INFO, 'working on %d test images -> %s' % (len(imset),resultfile))
    
    reader = TagReader(collection,tpp=tpp,rootpath=rootpath)   
    
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")
    output = []
    done = 0
    
    for im in imset:
        tags = reader.get(im)
        tagSet = set()
        tagSeq = []
        for tag in str.split(tags):
            if tag not in tagSet:
                tagSeq.append(tag)
                tagSet.add(tag)
        assert(len(tagSeq) == len(tagSet))
        
        nr_tags = len(tagSeq)
        tagvotes = [(tagSeq[i], 1.0-float(i)/nr_tags) for i in range(nr_tags)]
        newline = "%s %s" % (im, " ".join(["%s %g" % (x[0],x[1]) for x in tagvotes]))
        output.append(newline + "\n")
        done += 1
        
        if len(output)%1e4 == 0:
            printStatus(INFO, '%d %s %s' % (done,im,' '.join(['%s:%g' % (x[0],x[1]) for x in tagvotes[:3]] )))
            fw.write("".join(output))
            fw.flush()
            output = []
        
    if output:
        fw.write("".join(output))
    fw.close()
    printStatus(INFO, 'done')
Exemple #4
0
def process(options, testCollection, trainCollection, feature):
    rootpath = options.rootpath
    overwrite = options.overwrite
    tpp = options.tpp
    doRandomwalk = 1  #options.doRandomwalk
    uniqueUser = 0  #options.uniqueUser
    k = 1000  #options.k
    numjobs = options.numjobs
    job = options.job

    #resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection,
    #                          "%s,tagrank%d%d,%d,%s" % (feature,doRandomwalk,uniqueUser,k,tpp), "id.tagvotes.txt")

    resultfile = os.path.join(rootpath, testCollection, "tagrel",
                              testCollection, trainCollection,
                              '%s,tagrank,%s' % (feature, tpp),
                              'id.tagvotes.txt')

    if numjobs > 1:
        resultfile = resultfile + '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        sys.exit(0)

    try:
        doneset = set(
            [str.split(x)[0] for x in open(options.donefile).readlines()[:-1]])
    except:
        doneset = set()

    printStatus(INFO, "done set: %d" % len(doneset))

    testImageSet = readImageSet(testCollection, testCollection, rootpath)
    testImageSet = [x for x in testImageSet if x not in doneset]
    testImageSet = [
        testImageSet[i] for i in range(len(testImageSet))
        if (i % numjobs + 1) == job
    ]
    printStatus(
        INFO, 'working on %d-%d, %d test images -> %s' %
        (numjobs, job, len(testImageSet), resultfile))

    testreader = TagReader(testCollection, rootpath=rootpath)
    test_feat_file = BigFile(
        os.path.join(rootpath, testCollection, 'FeatureData', feature))
    block_size = 100

    tagranking = TagRanking(trainCollection,
                            feature=feature,
                            k=k,
                            rootpath=rootpath)

    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    done = 0

    nr_of_blocks = len(testImageSet) / block_size
    if nr_of_blocks * block_size < len(testImageSet):
        nr_of_blocks += 1

    for block_index in range(nr_of_blocks):
        start = block_index * block_size
        end = min(len(testImageSet), start + block_size)
        subset = testImageSet[start:end]
        if not subset:
            break
        renamed, features = test_feat_file.read(subset)
        printStatus(INFO, '%d - %d: %d images' % (start, end, len(subset)))

        output = []
        for i in range(len(renamed)):
            qry_id = renamed[i]
            qry_tags = testreader.get(qry_id)
            qry_vec = features[i]
            tagvotes = tagranking.estimate(
                qry_vec,
                qry_tags)  #, uniqueUser=uniqueUser, doRandomwalk=doRandomwalk)
            newline = "%s %s" % (qry_id, " ".join(
                ["%s %g" % (x[0], x[1]) for x in tagvotes]))
            output.append(newline + "\n")
            done += 1

        #printStatus(INFO, '%d %s %s' % (done,qry_id,' '.join(['%s:%g' % (x[0],x[1]) for x in tagvotes[:3]] )))
        fw.write("".join(output))
        fw.flush()

    fw.close()
    printStatus(INFO, 'done')
Exemple #5
0
def process(options, testCollection, trainCollection, feature):
    rootpath = options.rootpath
    overwrite = options.overwrite
    tpp = options.tpp
    doRandomwalk =  1 #options.doRandomwalk
    uniqueUser = 0 #options.uniqueUser
    k = 1000 #options.k
    numjobs = options.numjobs
    job = options.job
    
    #resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection, 
    #                          "%s,tagrank%d%d,%d,%s" % (feature,doRandomwalk,uniqueUser,k,tpp), "id.tagvotes.txt")
    
    resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection, '%s,tagrank,%s' % (feature,tpp), 'id.tagvotes.txt')
        
    if numjobs>1:
        resultfile = resultfile + '.%d.%d' % (numjobs, job)
                              
    if checkToSkip(resultfile, overwrite):
        sys.exit(0)    

    try:
        doneset = set([str.split(x)[0] for x in open(options.donefile).readlines()[:-1]])
    except:
        doneset = set()
        
    printStatus(INFO, "done set: %d" % len(doneset))
    
    testImageSet = readImageSet(testCollection, testCollection, rootpath)
    testImageSet = [x for x in testImageSet if x not in doneset]
    testImageSet = [testImageSet[i] for i in range(len(testImageSet)) if (i%numjobs+1) == job]
    printStatus(INFO, 'working on %d-%d, %d test images -> %s' % (numjobs,job,len(testImageSet),resultfile) )
    
    testreader = TagReader(testCollection, rootpath=rootpath)   
    test_feat_file = BigFile(os.path.join(rootpath, testCollection, 'FeatureData', feature))
    block_size = 100

    tagranking = TagRanking(trainCollection, feature=feature, k=k, rootpath=rootpath)
    
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")
    
    done = 0
    
    nr_of_blocks = len(testImageSet) / block_size
    if nr_of_blocks * block_size < len(testImageSet):
        nr_of_blocks += 1

    for block_index in range(nr_of_blocks):
        start = block_index * block_size
        end = min(len(testImageSet), start + block_size)
        subset = testImageSet[start:end]
        if not subset:
            break
        renamed, features = test_feat_file.read(subset)
        printStatus(INFO, '%d - %d: %d images' % (start, end, len(subset)))
        
        output = []
        for i in range(len(renamed)):
            qry_id = renamed[i]
            qry_tags = testreader.get(qry_id)
            qry_vec = features[i]
            tagvotes = tagranking.estimate(qry_vec, qry_tags) #, uniqueUser=uniqueUser, doRandomwalk=doRandomwalk)
            newline = "%s %s" % (qry_id, " ".join(["%s %g" % (x[0],x[1]) for x in tagvotes]))
            output.append(newline + "\n")
            done += 1
        
        #printStatus(INFO, '%d %s %s' % (done,qry_id,' '.join(['%s:%g' % (x[0],x[1]) for x in tagvotes[:3]] )))
        fw.write("".join(output))
        fw.flush()
  
    fw.close()
    printStatus(INFO, 'done')